End of training

Browse files

Files changed (5) hide show

all_results.json +14 -0
debugger_ovh_transformers.ipynb +195 -1
eval_results.json +9 -0
train_results.json +8 -0
trainer_state.json +25 -0

all_results.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+    "epoch": 0.0,
+    "eval_loss": 140.0673828125,
+    "eval_runtime": 217.9185,
+    "eval_samples": 9184,
+    "eval_samples_per_second": 42.144,
+    "eval_steps_per_second": 5.268,
+    "eval_wer": 1.119321698229979,
+    "train_loss": 67.9575927734375,
+    "train_runtime": 3.7102,
+    "train_samples": 30002,
+    "train_samples_per_second": 5.391,
+    "train_steps_per_second": 2.695
+}

debugger_ovh_transformers.ipynb CHANGED Viewed

@@ -426,7 +426,201 @@
       "\n",
       "Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at hf-test/xls-r-dummy and are newly initialized: ['lm_head.bias', 'lm_head.weight']\n",
       "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
-      "preprocess datasets: 26930ex [03:18, 192.81ex/s]"
      ]
     }
    ],

       "\n",
       "Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at hf-test/xls-r-dummy and are newly initialized: ['lm_head.bias', 'lm_head.weight']\n",
       "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+      "preprocess datasets: 30002ex [03:40, 136.35ex/s]\n",
+      "preprocess datasets: 9184ex [01:06, 137.56ex/s]\n",
+      "100%|██████████████████████████████████████████| 31/31 [00:00<00:00, 809.78ba/s]\n",
+      "100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 874.45ba/s]\n",
+      "Configuration saved in ./preprocessor_config.json\n",
+      "tokenizer config file saved in ./tokenizer_config.json\n",
+      "Special tokens file saved in ./special_tokens_map.json\n",
+      "added tokens file saved in ./added_tokens.json\n",
+      "Configuration saved in ./config.json\n",
+      "loading feature extractor configuration file ./preprocessor_config.json\n",
+      "loading configuration file ./config.json\n",
+      "Model config Wav2Vec2Config {\n",
+      "  \"_name_or_path\": \"./\",\n",
+      "  \"activation_dropout\": 0.0,\n",
+      "  \"adapter_kernel_size\": 3,\n",
+      "  \"adapter_stride\": 2,\n",
+      "  \"add_adapter\": false,\n",
+      "  \"apply_spec_augment\": true,\n",
+      "  \"architectures\": [\n",
+      "    \"Wav2Vec2Model\"\n",
+      "  ],\n",
+      "  \"attention_dropout\": 0.0,\n",
+      "  \"bos_token_id\": 1,\n",
+      "  \"classifier_proj_size\": 256,\n",
+      "  \"codevector_dim\": 256,\n",
+      "  \"contrastive_logits_temperature\": 0.1,\n",
+      "  \"conv_bias\": false,\n",
+      "  \"conv_dim\": [\n",
+      "    32,\n",
+      "    32,\n",
+      "    32\n",
+      "  ],\n",
+      "  \"conv_kernel\": [\n",
+      "    8,\n",
+      "    8,\n",
+      "    8\n",
+      "  ],\n",
+      "  \"conv_stride\": [\n",
+      "    4,\n",
+      "    4,\n",
+      "    4\n",
+      "  ],\n",
+      "  \"ctc_loss_reduction\": \"mean\",\n",
+      "  \"ctc_zero_infinity\": false,\n",
+      "  \"diversity_loss_weight\": 0.1,\n",
+      "  \"do_stable_layer_norm\": true,\n",
+      "  \"eos_token_id\": 2,\n",
+      "  \"feat_extract_activation\": \"gelu\",\n",
+      "  \"feat_extract_dropout\": 0.0,\n",
+      "  \"feat_extract_norm\": \"layer\",\n",
+      "  \"feat_proj_dropout\": 0.0,\n",
+      "  \"feat_quantizer_dropout\": 0.0,\n",
+      "  \"final_dropout\": 0.0,\n",
+      "  \"hidden_act\": \"gelu\",\n",
+      "  \"hidden_dropout\": 0.0,\n",
+      "  \"hidden_dropout_prob\": 0.1,\n",
+      "  \"hidden_size\": 16,\n",
+      "  \"initializer_range\": 0.02,\n",
+      "  \"intermediate_size\": 20,\n",
+      "  \"layer_norm_eps\": 1e-05,\n",
+      "  \"layerdrop\": 0.0,\n",
+      "  \"mask_feature_length\": 10,\n",
+      "  \"mask_feature_min_masks\": 0,\n",
+      "  \"mask_feature_prob\": 0.0,\n",
+      "  \"mask_time_length\": 10,\n",
+      "  \"mask_time_min_masks\": 2,\n",
+      "  \"mask_time_prob\": 0.05,\n",
+      "  \"model_type\": \"wav2vec2\",\n",
+      "  \"num_adapter_layers\": 3,\n",
+      "  \"num_attention_heads\": 2,\n",
+      "  \"num_codevector_groups\": 2,\n",
+      "  \"num_codevectors_per_group\": 320,\n",
+      "  \"num_conv_pos_embedding_groups\": 2,\n",
+      "  \"num_conv_pos_embeddings\": 16,\n",
+      "  \"num_feat_extract_layers\": 3,\n",
+      "  \"num_hidden_layers\": 4,\n",
+      "  \"num_negatives\": 10,\n",
+      "  \"output_hidden_size\": 16,\n",
+      "  \"pad_token_id\": 51,\n",
+      "  \"proj_codevector_dim\": 256,\n",
+      "  \"tdnn_dilation\": [\n",
+      "    1,\n",
+      "    2,\n",
+      "    3,\n",
+      "    1,\n",
+      "    1\n",
+      "  ],\n",
+      "  \"tdnn_dim\": [\n",
+      "    512,\n",
+      "    512,\n",
+      "    512,\n",
+      "    512,\n",
+      "    1500\n",
+      "  ],\n",
+      "  \"tdnn_kernel\": [\n",
+      "    5,\n",
+      "    3,\n",
+      "    3,\n",
+      "    1,\n",
+      "    1\n",
+      "  ],\n",
+      "  \"torch_dtype\": \"float32\",\n",
+      "  \"transformers_version\": \"4.17.0.dev0\",\n",
+      "  \"use_weighted_layer_sum\": false,\n",
+      "  \"vocab_size\": 54,\n",
+      "  \"xvector_output_dim\": 512\n",
+      "}\n",
+      "\n",
+      "loading feature extractor configuration file ./preprocessor_config.json\n",
+      "Feature extractor Wav2Vec2FeatureExtractor {\n",
+      "  \"do_normalize\": true,\n",
+      "  \"feature_extractor_type\": \"Wav2Vec2FeatureExtractor\",\n",
+      "  \"feature_size\": 1,\n",
+      "  \"padding_side\": \"right\",\n",
+      "  \"padding_value\": 0.0,\n",
+      "  \"return_attention_mask\": false,\n",
+      "  \"sampling_rate\": 16000\n",
+      "}\n",
+      "\n",
+      "Didn't find file ./tokenizer.json. We won't load it.\n",
+      "loading file ./vocab.json\n",
+      "loading file ./tokenizer_config.json\n",
+      "loading file ./added_tokens.json\n",
+      "loading file ./special_tokens_map.json\n",
+      "loading file None\n",
+      "Adding <s> to the vocabulary\n",
+      "Adding </s> to the vocabulary\n",
+      "/workspace/xls-r-ab-test/./ is already a clone of https://huggingface.co/masapasa/xls-r-ab-test. Make sure you pull the latest changes with `repo.git_pull()`.\n",
+      "01/31/2022 17:18:19 - WARNING - huggingface_hub.repository - /workspace/xls-r-ab-test/./ is already a clone of https://huggingface.co/masapasa/xls-r-ab-test. Make sure you pull the latest changes with `repo.git_pull()`.\n",
+      "max_steps is given, it will override any value given in num_train_epochs\n",
+      "Using amp half precision backend\n",
+      "The following columns in the training set  don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+      "/opt/conda/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use thePyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
+      "  warnings.warn(\n",
+      "***** Running training *****\n",
+      "  Num examples = 30002\n",
+      "  Num Epochs = 1\n",
+      "  Instantaneous batch size per device = 2\n",
+      "  Total train batch size (w. parallel, distributed & accumulation) = 2\n",
+      "  Gradient Accumulation steps = 1\n",
+      "  Total optimization steps = 10\n",
+      " 50%|██████████████████████                      | 5/10 [00:00<00:00, 10.35it/s]Saving model checkpoint to ./checkpoint-5\n",
+      "Configuration saved in ./checkpoint-5/config.json\n",
+      "Model weights saved in ./checkpoint-5/pytorch_model.bin\n",
+      "Configuration saved in ./checkpoint-5/preprocessor_config.json\n",
+      "Configuration saved in ./preprocessor_config.json\n",
+      " 90%|███████████████████████████████████████▌    | 9/10 [00:03<00:00,  2.20it/s]Saving model checkpoint to ./checkpoint-10\n",
+      "Configuration saved in ./checkpoint-10/config.json\n",
+      "Model weights saved in ./checkpoint-10/pytorch_model.bin\n",
+      "Configuration saved in ./checkpoint-10/preprocessor_config.json\n",
+      "Deleting older checkpoint [checkpoint-5] due to args.save_total_limit\n",
+      "\n",
+      "\n",
+      "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
+      "\n",
+      "\n",
+      "{'train_runtime': 3.7102, 'train_samples_per_second': 5.391, 'train_steps_per_second': 2.695, 'train_loss': 67.9575927734375, 'epoch': 0.0}\n",
+      "100%|███████████████████████████████████████████| 10/10 [00:03<00:00,  2.70it/s]\n",
+      "Saving model checkpoint to ./\n",
+      "Configuration saved in ./config.json\n",
+      "Model weights saved in ./pytorch_model.bin\n",
+      "Configuration saved in ./preprocessor_config.json\n",
+      "Saving model checkpoint to ./\n",
+      "Configuration saved in ./config.json\n",
+      "Model weights saved in ./pytorch_model.bin\n",
+      "Configuration saved in ./preprocessor_config.json\n",
+      "Several commits (2) will be pushed upstream.\n",
+      "01/31/2022 17:18:26 - WARNING - huggingface_hub.repository - Several commits (2) will be pushed upstream.\n",
+      "The progress bars may be unreliable.\n",
+      "01/31/2022 17:18:26 - WARNING - huggingface_hub.repository - The progress bars may be unreliable.\n",
+      "Everything up-to-date\n",
+      "\n",
+      "01/31/2022 17:18:27 - WARNING - huggingface_hub.repository - Everything up-to-date\n",
+      "\n",
+      "Dropping the following result as it does not have all the necessary fields:\n",
+      "{'dataset': {'name': 'common_voice', 'type': 'common_voice', 'args': 'ab'}}\n",
+      "To https://huggingface.co/masapasa/xls-r-ab-test\n",
+      "   b50c32e..4e53539  main -> main\n",
+      "\n",
+      "01/31/2022 17:18:33 - WARNING - huggingface_hub.repository - To https://huggingface.co/masapasa/xls-r-ab-test\n",
+      "   b50c32e..4e53539  main -> main\n",
+      "\n",
+      "***** train metrics *****\n",
+      "  epoch                    =        0.0\n",
+      "  train_loss               =    67.9576\n",
+      "  train_runtime            = 0:00:03.71\n",
+      "  train_samples            =      30002\n",
+      "  train_samples_per_second =      5.391\n",
+      "  train_steps_per_second   =      2.695\n",
+      "01/31/2022 17:18:36 - INFO - __main__ - *** Evaluate ***\n",
+      "The following columns in the evaluation set  don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+      "***** Running Evaluation *****\n",
+      "  Num examples = 9184\n",
+      "  Batch size = 8\n",
+      " 68%|███████████████████████████             | 777/1148 [02:07<01:06,  5.55it/s]"
      ]
     }
    ],

eval_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 0.0,
+    "eval_loss": 140.0673828125,
+    "eval_runtime": 217.9185,
+    "eval_samples": 9184,
+    "eval_samples_per_second": 42.144,
+    "eval_steps_per_second": 5.268,
+    "eval_wer": 1.119321698229979
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 0.0,
+    "train_loss": 67.9575927734375,
+    "train_runtime": 3.7102,
+    "train_samples": 30002,
+    "train_samples_per_second": 5.391,
+    "train_steps_per_second": 2.695
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.0006666222251849877,
+  "global_step": 10,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0,
+      "step": 10,
+      "total_flos": 334514838528.0,
+      "train_loss": 67.9575927734375,
+      "train_runtime": 3.7102,
+      "train_samples_per_second": 5.391,
+      "train_steps_per_second": 2.695
+    }
+  ],
+  "max_steps": 10,
+  "num_train_epochs": 1,
+  "total_flos": 334514838528.0,
+  "trial_name": null,
+  "trial_params": null
+}