End of training
Browse files- all_results.json +14 -0
- debugger_ovh_transformers.ipynb +195 -1
- eval_results.json +9 -0
- train_results.json +8 -0
- trainer_state.json +25 -0
all_results.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 0.0,
|
3 |
+
"eval_loss": 140.0673828125,
|
4 |
+
"eval_runtime": 217.9185,
|
5 |
+
"eval_samples": 9184,
|
6 |
+
"eval_samples_per_second": 42.144,
|
7 |
+
"eval_steps_per_second": 5.268,
|
8 |
+
"eval_wer": 1.119321698229979,
|
9 |
+
"train_loss": 67.9575927734375,
|
10 |
+
"train_runtime": 3.7102,
|
11 |
+
"train_samples": 30002,
|
12 |
+
"train_samples_per_second": 5.391,
|
13 |
+
"train_steps_per_second": 2.695
|
14 |
+
}
|
debugger_ovh_transformers.ipynb
CHANGED
@@ -426,7 +426,201 @@
|
|
426 |
"\n",
|
427 |
"Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at hf-test/xls-r-dummy and are newly initialized: ['lm_head.bias', 'lm_head.weight']\n",
|
428 |
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
|
429 |
-
"preprocess datasets:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
430 |
]
|
431 |
}
|
432 |
],
|
426 |
"\n",
|
427 |
"Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at hf-test/xls-r-dummy and are newly initialized: ['lm_head.bias', 'lm_head.weight']\n",
|
428 |
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
|
429 |
+
"preprocess datasets: 30002ex [03:40, 136.35ex/s]\n",
|
430 |
+
"preprocess datasets: 9184ex [01:06, 137.56ex/s]\n",
|
431 |
+
"100%|ββββββββββββββββββββββββββββββββββββββββββ| 31/31 [00:00<00:00, 809.78ba/s]\n",
|
432 |
+
"100%|ββββββββββββββββββββββββββββββββββββββββββ| 10/10 [00:00<00:00, 874.45ba/s]\n",
|
433 |
+
"Configuration saved in ./preprocessor_config.json\n",
|
434 |
+
"tokenizer config file saved in ./tokenizer_config.json\n",
|
435 |
+
"Special tokens file saved in ./special_tokens_map.json\n",
|
436 |
+
"added tokens file saved in ./added_tokens.json\n",
|
437 |
+
"Configuration saved in ./config.json\n",
|
438 |
+
"loading feature extractor configuration file ./preprocessor_config.json\n",
|
439 |
+
"loading configuration file ./config.json\n",
|
440 |
+
"Model config Wav2Vec2Config {\n",
|
441 |
+
" \"_name_or_path\": \"./\",\n",
|
442 |
+
" \"activation_dropout\": 0.0,\n",
|
443 |
+
" \"adapter_kernel_size\": 3,\n",
|
444 |
+
" \"adapter_stride\": 2,\n",
|
445 |
+
" \"add_adapter\": false,\n",
|
446 |
+
" \"apply_spec_augment\": true,\n",
|
447 |
+
" \"architectures\": [\n",
|
448 |
+
" \"Wav2Vec2Model\"\n",
|
449 |
+
" ],\n",
|
450 |
+
" \"attention_dropout\": 0.0,\n",
|
451 |
+
" \"bos_token_id\": 1,\n",
|
452 |
+
" \"classifier_proj_size\": 256,\n",
|
453 |
+
" \"codevector_dim\": 256,\n",
|
454 |
+
" \"contrastive_logits_temperature\": 0.1,\n",
|
455 |
+
" \"conv_bias\": false,\n",
|
456 |
+
" \"conv_dim\": [\n",
|
457 |
+
" 32,\n",
|
458 |
+
" 32,\n",
|
459 |
+
" 32\n",
|
460 |
+
" ],\n",
|
461 |
+
" \"conv_kernel\": [\n",
|
462 |
+
" 8,\n",
|
463 |
+
" 8,\n",
|
464 |
+
" 8\n",
|
465 |
+
" ],\n",
|
466 |
+
" \"conv_stride\": [\n",
|
467 |
+
" 4,\n",
|
468 |
+
" 4,\n",
|
469 |
+
" 4\n",
|
470 |
+
" ],\n",
|
471 |
+
" \"ctc_loss_reduction\": \"mean\",\n",
|
472 |
+
" \"ctc_zero_infinity\": false,\n",
|
473 |
+
" \"diversity_loss_weight\": 0.1,\n",
|
474 |
+
" \"do_stable_layer_norm\": true,\n",
|
475 |
+
" \"eos_token_id\": 2,\n",
|
476 |
+
" \"feat_extract_activation\": \"gelu\",\n",
|
477 |
+
" \"feat_extract_dropout\": 0.0,\n",
|
478 |
+
" \"feat_extract_norm\": \"layer\",\n",
|
479 |
+
" \"feat_proj_dropout\": 0.0,\n",
|
480 |
+
" \"feat_quantizer_dropout\": 0.0,\n",
|
481 |
+
" \"final_dropout\": 0.0,\n",
|
482 |
+
" \"hidden_act\": \"gelu\",\n",
|
483 |
+
" \"hidden_dropout\": 0.0,\n",
|
484 |
+
" \"hidden_dropout_prob\": 0.1,\n",
|
485 |
+
" \"hidden_size\": 16,\n",
|
486 |
+
" \"initializer_range\": 0.02,\n",
|
487 |
+
" \"intermediate_size\": 20,\n",
|
488 |
+
" \"layer_norm_eps\": 1e-05,\n",
|
489 |
+
" \"layerdrop\": 0.0,\n",
|
490 |
+
" \"mask_feature_length\": 10,\n",
|
491 |
+
" \"mask_feature_min_masks\": 0,\n",
|
492 |
+
" \"mask_feature_prob\": 0.0,\n",
|
493 |
+
" \"mask_time_length\": 10,\n",
|
494 |
+
" \"mask_time_min_masks\": 2,\n",
|
495 |
+
" \"mask_time_prob\": 0.05,\n",
|
496 |
+
" \"model_type\": \"wav2vec2\",\n",
|
497 |
+
" \"num_adapter_layers\": 3,\n",
|
498 |
+
" \"num_attention_heads\": 2,\n",
|
499 |
+
" \"num_codevector_groups\": 2,\n",
|
500 |
+
" \"num_codevectors_per_group\": 320,\n",
|
501 |
+
" \"num_conv_pos_embedding_groups\": 2,\n",
|
502 |
+
" \"num_conv_pos_embeddings\": 16,\n",
|
503 |
+
" \"num_feat_extract_layers\": 3,\n",
|
504 |
+
" \"num_hidden_layers\": 4,\n",
|
505 |
+
" \"num_negatives\": 10,\n",
|
506 |
+
" \"output_hidden_size\": 16,\n",
|
507 |
+
" \"pad_token_id\": 51,\n",
|
508 |
+
" \"proj_codevector_dim\": 256,\n",
|
509 |
+
" \"tdnn_dilation\": [\n",
|
510 |
+
" 1,\n",
|
511 |
+
" 2,\n",
|
512 |
+
" 3,\n",
|
513 |
+
" 1,\n",
|
514 |
+
" 1\n",
|
515 |
+
" ],\n",
|
516 |
+
" \"tdnn_dim\": [\n",
|
517 |
+
" 512,\n",
|
518 |
+
" 512,\n",
|
519 |
+
" 512,\n",
|
520 |
+
" 512,\n",
|
521 |
+
" 1500\n",
|
522 |
+
" ],\n",
|
523 |
+
" \"tdnn_kernel\": [\n",
|
524 |
+
" 5,\n",
|
525 |
+
" 3,\n",
|
526 |
+
" 3,\n",
|
527 |
+
" 1,\n",
|
528 |
+
" 1\n",
|
529 |
+
" ],\n",
|
530 |
+
" \"torch_dtype\": \"float32\",\n",
|
531 |
+
" \"transformers_version\": \"4.17.0.dev0\",\n",
|
532 |
+
" \"use_weighted_layer_sum\": false,\n",
|
533 |
+
" \"vocab_size\": 54,\n",
|
534 |
+
" \"xvector_output_dim\": 512\n",
|
535 |
+
"}\n",
|
536 |
+
"\n",
|
537 |
+
"loading feature extractor configuration file ./preprocessor_config.json\n",
|
538 |
+
"Feature extractor Wav2Vec2FeatureExtractor {\n",
|
539 |
+
" \"do_normalize\": true,\n",
|
540 |
+
" \"feature_extractor_type\": \"Wav2Vec2FeatureExtractor\",\n",
|
541 |
+
" \"feature_size\": 1,\n",
|
542 |
+
" \"padding_side\": \"right\",\n",
|
543 |
+
" \"padding_value\": 0.0,\n",
|
544 |
+
" \"return_attention_mask\": false,\n",
|
545 |
+
" \"sampling_rate\": 16000\n",
|
546 |
+
"}\n",
|
547 |
+
"\n",
|
548 |
+
"Didn't find file ./tokenizer.json. We won't load it.\n",
|
549 |
+
"loading file ./vocab.json\n",
|
550 |
+
"loading file ./tokenizer_config.json\n",
|
551 |
+
"loading file ./added_tokens.json\n",
|
552 |
+
"loading file ./special_tokens_map.json\n",
|
553 |
+
"loading file None\n",
|
554 |
+
"Adding <s> to the vocabulary\n",
|
555 |
+
"Adding </s> to the vocabulary\n",
|
556 |
+
"/workspace/xls-r-ab-test/./ is already a clone of https://huggingface.co/masapasa/xls-r-ab-test. Make sure you pull the latest changes with `repo.git_pull()`.\n",
|
557 |
+
"01/31/2022 17:18:19 - WARNING - huggingface_hub.repository - /workspace/xls-r-ab-test/./ is already a clone of https://huggingface.co/masapasa/xls-r-ab-test. Make sure you pull the latest changes with `repo.git_pull()`.\n",
|
558 |
+
"max_steps is given, it will override any value given in num_train_epochs\n",
|
559 |
+
"Using amp half precision backend\n",
|
560 |
+
"The following columns in the training set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
|
561 |
+
"/opt/conda/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use thePyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
|
562 |
+
" warnings.warn(\n",
|
563 |
+
"***** Running training *****\n",
|
564 |
+
" Num examples = 30002\n",
|
565 |
+
" Num Epochs = 1\n",
|
566 |
+
" Instantaneous batch size per device = 2\n",
|
567 |
+
" Total train batch size (w. parallel, distributed & accumulation) = 2\n",
|
568 |
+
" Gradient Accumulation steps = 1\n",
|
569 |
+
" Total optimization steps = 10\n",
|
570 |
+
" 50%|ββββββββββββββββββββββ | 5/10 [00:00<00:00, 10.35it/s]Saving model checkpoint to ./checkpoint-5\n",
|
571 |
+
"Configuration saved in ./checkpoint-5/config.json\n",
|
572 |
+
"Model weights saved in ./checkpoint-5/pytorch_model.bin\n",
|
573 |
+
"Configuration saved in ./checkpoint-5/preprocessor_config.json\n",
|
574 |
+
"Configuration saved in ./preprocessor_config.json\n",
|
575 |
+
" 90%|ββββββββββββββββββββββββββββββββββββββββ | 9/10 [00:03<00:00, 2.20it/s]Saving model checkpoint to ./checkpoint-10\n",
|
576 |
+
"Configuration saved in ./checkpoint-10/config.json\n",
|
577 |
+
"Model weights saved in ./checkpoint-10/pytorch_model.bin\n",
|
578 |
+
"Configuration saved in ./checkpoint-10/preprocessor_config.json\n",
|
579 |
+
"Deleting older checkpoint [checkpoint-5] due to args.save_total_limit\n",
|
580 |
+
"\n",
|
581 |
+
"\n",
|
582 |
+
"Training completed. Do not forget to share your model on huggingface.co/models =)\n",
|
583 |
+
"\n",
|
584 |
+
"\n",
|
585 |
+
"{'train_runtime': 3.7102, 'train_samples_per_second': 5.391, 'train_steps_per_second': 2.695, 'train_loss': 67.9575927734375, 'epoch': 0.0}\n",
|
586 |
+
"100%|βββββββββββββββββββββββββββββββββββββββββββ| 10/10 [00:03<00:00, 2.70it/s]\n",
|
587 |
+
"Saving model checkpoint to ./\n",
|
588 |
+
"Configuration saved in ./config.json\n",
|
589 |
+
"Model weights saved in ./pytorch_model.bin\n",
|
590 |
+
"Configuration saved in ./preprocessor_config.json\n",
|
591 |
+
"Saving model checkpoint to ./\n",
|
592 |
+
"Configuration saved in ./config.json\n",
|
593 |
+
"Model weights saved in ./pytorch_model.bin\n",
|
594 |
+
"Configuration saved in ./preprocessor_config.json\n",
|
595 |
+
"Several commits (2) will be pushed upstream.\n",
|
596 |
+
"01/31/2022 17:18:26 - WARNING - huggingface_hub.repository - Several commits (2) will be pushed upstream.\n",
|
597 |
+
"The progress bars may be unreliable.\n",
|
598 |
+
"01/31/2022 17:18:26 - WARNING - huggingface_hub.repository - The progress bars may be unreliable.\n",
|
599 |
+
"Everything up-to-date\n",
|
600 |
+
"\n",
|
601 |
+
"01/31/2022 17:18:27 - WARNING - huggingface_hub.repository - Everything up-to-date\n",
|
602 |
+
"\n",
|
603 |
+
"Dropping the following result as it does not have all the necessary fields:\n",
|
604 |
+
"{'dataset': {'name': 'common_voice', 'type': 'common_voice', 'args': 'ab'}}\n",
|
605 |
+
"To https://huggingface.co/masapasa/xls-r-ab-test\n",
|
606 |
+
" b50c32e..4e53539 main -> main\n",
|
607 |
+
"\n",
|
608 |
+
"01/31/2022 17:18:33 - WARNING - huggingface_hub.repository - To https://huggingface.co/masapasa/xls-r-ab-test\n",
|
609 |
+
" b50c32e..4e53539 main -> main\n",
|
610 |
+
"\n",
|
611 |
+
"***** train metrics *****\n",
|
612 |
+
" epoch = 0.0\n",
|
613 |
+
" train_loss = 67.9576\n",
|
614 |
+
" train_runtime = 0:00:03.71\n",
|
615 |
+
" train_samples = 30002\n",
|
616 |
+
" train_samples_per_second = 5.391\n",
|
617 |
+
" train_steps_per_second = 2.695\n",
|
618 |
+
"01/31/2022 17:18:36 - INFO - __main__ - *** Evaluate ***\n",
|
619 |
+
"The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
|
620 |
+
"***** Running Evaluation *****\n",
|
621 |
+
" Num examples = 9184\n",
|
622 |
+
" Batch size = 8\n",
|
623 |
+
" 68%|βββββββββββββββββββββββββββ | 777/1148 [02:07<01:06, 5.55it/s]"
|
624 |
]
|
625 |
}
|
626 |
],
|
eval_results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 0.0,
|
3 |
+
"eval_loss": 140.0673828125,
|
4 |
+
"eval_runtime": 217.9185,
|
5 |
+
"eval_samples": 9184,
|
6 |
+
"eval_samples_per_second": 42.144,
|
7 |
+
"eval_steps_per_second": 5.268,
|
8 |
+
"eval_wer": 1.119321698229979
|
9 |
+
}
|
train_results.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 0.0,
|
3 |
+
"train_loss": 67.9575927734375,
|
4 |
+
"train_runtime": 3.7102,
|
5 |
+
"train_samples": 30002,
|
6 |
+
"train_samples_per_second": 5.391,
|
7 |
+
"train_steps_per_second": 2.695
|
8 |
+
}
|
trainer_state.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.0006666222251849877,
|
5 |
+
"global_step": 10,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 0.0,
|
12 |
+
"step": 10,
|
13 |
+
"total_flos": 334514838528.0,
|
14 |
+
"train_loss": 67.9575927734375,
|
15 |
+
"train_runtime": 3.7102,
|
16 |
+
"train_samples_per_second": 5.391,
|
17 |
+
"train_steps_per_second": 2.695
|
18 |
+
}
|
19 |
+
],
|
20 |
+
"max_steps": 10,
|
21 |
+
"num_train_epochs": 1,
|
22 |
+
"total_flos": 334514838528.0,
|
23 |
+
"trial_name": null,
|
24 |
+
"trial_params": null
|
25 |
+
}
|