masapasa commited on
Commit
053cbec
β€’
1 Parent(s): 4e53539

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.0,
3
+ "eval_loss": 140.0673828125,
4
+ "eval_runtime": 217.9185,
5
+ "eval_samples": 9184,
6
+ "eval_samples_per_second": 42.144,
7
+ "eval_steps_per_second": 5.268,
8
+ "eval_wer": 1.119321698229979,
9
+ "train_loss": 67.9575927734375,
10
+ "train_runtime": 3.7102,
11
+ "train_samples": 30002,
12
+ "train_samples_per_second": 5.391,
13
+ "train_steps_per_second": 2.695
14
+ }
debugger_ovh_transformers.ipynb CHANGED
@@ -426,7 +426,201 @@
426
  "\n",
427
  "Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at hf-test/xls-r-dummy and are newly initialized: ['lm_head.bias', 'lm_head.weight']\n",
428
  "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
429
- "preprocess datasets: 26930ex [03:18, 192.81ex/s]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
430
  ]
431
  }
432
  ],
426
  "\n",
427
  "Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at hf-test/xls-r-dummy and are newly initialized: ['lm_head.bias', 'lm_head.weight']\n",
428
  "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
429
+ "preprocess datasets: 30002ex [03:40, 136.35ex/s]\n",
430
+ "preprocess datasets: 9184ex [01:06, 137.56ex/s]\n",
431
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 31/31 [00:00<00:00, 809.78ba/s]\n",
432
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 10/10 [00:00<00:00, 874.45ba/s]\n",
433
+ "Configuration saved in ./preprocessor_config.json\n",
434
+ "tokenizer config file saved in ./tokenizer_config.json\n",
435
+ "Special tokens file saved in ./special_tokens_map.json\n",
436
+ "added tokens file saved in ./added_tokens.json\n",
437
+ "Configuration saved in ./config.json\n",
438
+ "loading feature extractor configuration file ./preprocessor_config.json\n",
439
+ "loading configuration file ./config.json\n",
440
+ "Model config Wav2Vec2Config {\n",
441
+ " \"_name_or_path\": \"./\",\n",
442
+ " \"activation_dropout\": 0.0,\n",
443
+ " \"adapter_kernel_size\": 3,\n",
444
+ " \"adapter_stride\": 2,\n",
445
+ " \"add_adapter\": false,\n",
446
+ " \"apply_spec_augment\": true,\n",
447
+ " \"architectures\": [\n",
448
+ " \"Wav2Vec2Model\"\n",
449
+ " ],\n",
450
+ " \"attention_dropout\": 0.0,\n",
451
+ " \"bos_token_id\": 1,\n",
452
+ " \"classifier_proj_size\": 256,\n",
453
+ " \"codevector_dim\": 256,\n",
454
+ " \"contrastive_logits_temperature\": 0.1,\n",
455
+ " \"conv_bias\": false,\n",
456
+ " \"conv_dim\": [\n",
457
+ " 32,\n",
458
+ " 32,\n",
459
+ " 32\n",
460
+ " ],\n",
461
+ " \"conv_kernel\": [\n",
462
+ " 8,\n",
463
+ " 8,\n",
464
+ " 8\n",
465
+ " ],\n",
466
+ " \"conv_stride\": [\n",
467
+ " 4,\n",
468
+ " 4,\n",
469
+ " 4\n",
470
+ " ],\n",
471
+ " \"ctc_loss_reduction\": \"mean\",\n",
472
+ " \"ctc_zero_infinity\": false,\n",
473
+ " \"diversity_loss_weight\": 0.1,\n",
474
+ " \"do_stable_layer_norm\": true,\n",
475
+ " \"eos_token_id\": 2,\n",
476
+ " \"feat_extract_activation\": \"gelu\",\n",
477
+ " \"feat_extract_dropout\": 0.0,\n",
478
+ " \"feat_extract_norm\": \"layer\",\n",
479
+ " \"feat_proj_dropout\": 0.0,\n",
480
+ " \"feat_quantizer_dropout\": 0.0,\n",
481
+ " \"final_dropout\": 0.0,\n",
482
+ " \"hidden_act\": \"gelu\",\n",
483
+ " \"hidden_dropout\": 0.0,\n",
484
+ " \"hidden_dropout_prob\": 0.1,\n",
485
+ " \"hidden_size\": 16,\n",
486
+ " \"initializer_range\": 0.02,\n",
487
+ " \"intermediate_size\": 20,\n",
488
+ " \"layer_norm_eps\": 1e-05,\n",
489
+ " \"layerdrop\": 0.0,\n",
490
+ " \"mask_feature_length\": 10,\n",
491
+ " \"mask_feature_min_masks\": 0,\n",
492
+ " \"mask_feature_prob\": 0.0,\n",
493
+ " \"mask_time_length\": 10,\n",
494
+ " \"mask_time_min_masks\": 2,\n",
495
+ " \"mask_time_prob\": 0.05,\n",
496
+ " \"model_type\": \"wav2vec2\",\n",
497
+ " \"num_adapter_layers\": 3,\n",
498
+ " \"num_attention_heads\": 2,\n",
499
+ " \"num_codevector_groups\": 2,\n",
500
+ " \"num_codevectors_per_group\": 320,\n",
501
+ " \"num_conv_pos_embedding_groups\": 2,\n",
502
+ " \"num_conv_pos_embeddings\": 16,\n",
503
+ " \"num_feat_extract_layers\": 3,\n",
504
+ " \"num_hidden_layers\": 4,\n",
505
+ " \"num_negatives\": 10,\n",
506
+ " \"output_hidden_size\": 16,\n",
507
+ " \"pad_token_id\": 51,\n",
508
+ " \"proj_codevector_dim\": 256,\n",
509
+ " \"tdnn_dilation\": [\n",
510
+ " 1,\n",
511
+ " 2,\n",
512
+ " 3,\n",
513
+ " 1,\n",
514
+ " 1\n",
515
+ " ],\n",
516
+ " \"tdnn_dim\": [\n",
517
+ " 512,\n",
518
+ " 512,\n",
519
+ " 512,\n",
520
+ " 512,\n",
521
+ " 1500\n",
522
+ " ],\n",
523
+ " \"tdnn_kernel\": [\n",
524
+ " 5,\n",
525
+ " 3,\n",
526
+ " 3,\n",
527
+ " 1,\n",
528
+ " 1\n",
529
+ " ],\n",
530
+ " \"torch_dtype\": \"float32\",\n",
531
+ " \"transformers_version\": \"4.17.0.dev0\",\n",
532
+ " \"use_weighted_layer_sum\": false,\n",
533
+ " \"vocab_size\": 54,\n",
534
+ " \"xvector_output_dim\": 512\n",
535
+ "}\n",
536
+ "\n",
537
+ "loading feature extractor configuration file ./preprocessor_config.json\n",
538
+ "Feature extractor Wav2Vec2FeatureExtractor {\n",
539
+ " \"do_normalize\": true,\n",
540
+ " \"feature_extractor_type\": \"Wav2Vec2FeatureExtractor\",\n",
541
+ " \"feature_size\": 1,\n",
542
+ " \"padding_side\": \"right\",\n",
543
+ " \"padding_value\": 0.0,\n",
544
+ " \"return_attention_mask\": false,\n",
545
+ " \"sampling_rate\": 16000\n",
546
+ "}\n",
547
+ "\n",
548
+ "Didn't find file ./tokenizer.json. We won't load it.\n",
549
+ "loading file ./vocab.json\n",
550
+ "loading file ./tokenizer_config.json\n",
551
+ "loading file ./added_tokens.json\n",
552
+ "loading file ./special_tokens_map.json\n",
553
+ "loading file None\n",
554
+ "Adding <s> to the vocabulary\n",
555
+ "Adding </s> to the vocabulary\n",
556
+ "/workspace/xls-r-ab-test/./ is already a clone of https://huggingface.co/masapasa/xls-r-ab-test. Make sure you pull the latest changes with `repo.git_pull()`.\n",
557
+ "01/31/2022 17:18:19 - WARNING - huggingface_hub.repository - /workspace/xls-r-ab-test/./ is already a clone of https://huggingface.co/masapasa/xls-r-ab-test. Make sure you pull the latest changes with `repo.git_pull()`.\n",
558
+ "max_steps is given, it will override any value given in num_train_epochs\n",
559
+ "Using amp half precision backend\n",
560
+ "The following columns in the training set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
561
+ "/opt/conda/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use thePyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
562
+ " warnings.warn(\n",
563
+ "***** Running training *****\n",
564
+ " Num examples = 30002\n",
565
+ " Num Epochs = 1\n",
566
+ " Instantaneous batch size per device = 2\n",
567
+ " Total train batch size (w. parallel, distributed & accumulation) = 2\n",
568
+ " Gradient Accumulation steps = 1\n",
569
+ " Total optimization steps = 10\n",
570
+ " 50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5/10 [00:00<00:00, 10.35it/s]Saving model checkpoint to ./checkpoint-5\n",
571
+ "Configuration saved in ./checkpoint-5/config.json\n",
572
+ "Model weights saved in ./checkpoint-5/pytorch_model.bin\n",
573
+ "Configuration saved in ./checkpoint-5/preprocessor_config.json\n",
574
+ "Configuration saved in ./preprocessor_config.json\n",
575
+ " 90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 9/10 [00:03<00:00, 2.20it/s]Saving model checkpoint to ./checkpoint-10\n",
576
+ "Configuration saved in ./checkpoint-10/config.json\n",
577
+ "Model weights saved in ./checkpoint-10/pytorch_model.bin\n",
578
+ "Configuration saved in ./checkpoint-10/preprocessor_config.json\n",
579
+ "Deleting older checkpoint [checkpoint-5] due to args.save_total_limit\n",
580
+ "\n",
581
+ "\n",
582
+ "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
583
+ "\n",
584
+ "\n",
585
+ "{'train_runtime': 3.7102, 'train_samples_per_second': 5.391, 'train_steps_per_second': 2.695, 'train_loss': 67.9575927734375, 'epoch': 0.0}\n",
586
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 10/10 [00:03<00:00, 2.70it/s]\n",
587
+ "Saving model checkpoint to ./\n",
588
+ "Configuration saved in ./config.json\n",
589
+ "Model weights saved in ./pytorch_model.bin\n",
590
+ "Configuration saved in ./preprocessor_config.json\n",
591
+ "Saving model checkpoint to ./\n",
592
+ "Configuration saved in ./config.json\n",
593
+ "Model weights saved in ./pytorch_model.bin\n",
594
+ "Configuration saved in ./preprocessor_config.json\n",
595
+ "Several commits (2) will be pushed upstream.\n",
596
+ "01/31/2022 17:18:26 - WARNING - huggingface_hub.repository - Several commits (2) will be pushed upstream.\n",
597
+ "The progress bars may be unreliable.\n",
598
+ "01/31/2022 17:18:26 - WARNING - huggingface_hub.repository - The progress bars may be unreliable.\n",
599
+ "Everything up-to-date\n",
600
+ "\n",
601
+ "01/31/2022 17:18:27 - WARNING - huggingface_hub.repository - Everything up-to-date\n",
602
+ "\n",
603
+ "Dropping the following result as it does not have all the necessary fields:\n",
604
+ "{'dataset': {'name': 'common_voice', 'type': 'common_voice', 'args': 'ab'}}\n",
605
+ "To https://huggingface.co/masapasa/xls-r-ab-test\n",
606
+ " b50c32e..4e53539 main -> main\n",
607
+ "\n",
608
+ "01/31/2022 17:18:33 - WARNING - huggingface_hub.repository - To https://huggingface.co/masapasa/xls-r-ab-test\n",
609
+ " b50c32e..4e53539 main -> main\n",
610
+ "\n",
611
+ "***** train metrics *****\n",
612
+ " epoch = 0.0\n",
613
+ " train_loss = 67.9576\n",
614
+ " train_runtime = 0:00:03.71\n",
615
+ " train_samples = 30002\n",
616
+ " train_samples_per_second = 5.391\n",
617
+ " train_steps_per_second = 2.695\n",
618
+ "01/31/2022 17:18:36 - INFO - __main__ - *** Evaluate ***\n",
619
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
620
+ "***** Running Evaluation *****\n",
621
+ " Num examples = 9184\n",
622
+ " Batch size = 8\n",
623
+ " 68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 777/1148 [02:07<01:06, 5.55it/s]"
624
  ]
625
  }
626
  ],
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.0,
3
+ "eval_loss": 140.0673828125,
4
+ "eval_runtime": 217.9185,
5
+ "eval_samples": 9184,
6
+ "eval_samples_per_second": 42.144,
7
+ "eval_steps_per_second": 5.268,
8
+ "eval_wer": 1.119321698229979
9
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.0,
3
+ "train_loss": 67.9575927734375,
4
+ "train_runtime": 3.7102,
5
+ "train_samples": 30002,
6
+ "train_samples_per_second": 5.391,
7
+ "train_steps_per_second": 2.695
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.0006666222251849877,
5
+ "global_step": 10,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.0,
12
+ "step": 10,
13
+ "total_flos": 334514838528.0,
14
+ "train_loss": 67.9575927734375,
15
+ "train_runtime": 3.7102,
16
+ "train_samples_per_second": 5.391,
17
+ "train_steps_per_second": 2.695
18
+ }
19
+ ],
20
+ "max_steps": 10,
21
+ "num_train_epochs": 1,
22
+ "total_flos": 334514838528.0,
23
+ "trial_name": null,
24
+ "trial_params": null
25
+ }