HarrisDePerceptron commited on
Commit
7d289a7
1 Parent(s): 5048c77

Training in progress, step 500

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-*/
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<s>": 59, "</s>": 60}
config.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/wav2vec2-xls-r-300m",
3
+ "activation_dropout": 0.1,
4
+ "adapter_kernel_size": 3,
5
+ "adapter_stride": 2,
6
+ "add_adapter": false,
7
+ "apply_spec_augment": true,
8
+ "architectures": [
9
+ "Wav2Vec2ForCTC"
10
+ ],
11
+ "attention_dropout": 0.0,
12
+ "bos_token_id": 1,
13
+ "classifier_proj_size": 256,
14
+ "codevector_dim": 768,
15
+ "contrastive_logits_temperature": 0.1,
16
+ "conv_bias": true,
17
+ "conv_dim": [
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512
25
+ ],
26
+ "conv_kernel": [
27
+ 10,
28
+ 3,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 2,
33
+ 2
34
+ ],
35
+ "conv_stride": [
36
+ 5,
37
+ 2,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2
43
+ ],
44
+ "ctc_loss_reduction": "mean",
45
+ "ctc_zero_infinity": false,
46
+ "diversity_loss_weight": 0.1,
47
+ "do_stable_layer_norm": true,
48
+ "eos_token_id": 2,
49
+ "feat_extract_activation": "gelu",
50
+ "feat_extract_dropout": 0.0,
51
+ "feat_extract_norm": "layer",
52
+ "feat_proj_dropout": 0.0,
53
+ "feat_quantizer_dropout": 0.0,
54
+ "final_dropout": 0.0,
55
+ "hidden_act": "gelu",
56
+ "hidden_dropout": 0.0,
57
+ "hidden_size": 1024,
58
+ "initializer_range": 0.02,
59
+ "intermediate_size": 4096,
60
+ "layer_norm_eps": 1e-05,
61
+ "layerdrop": 0.0,
62
+ "mask_feature_length": 64,
63
+ "mask_feature_min_masks": 0,
64
+ "mask_feature_prob": 0.25,
65
+ "mask_time_length": 10,
66
+ "mask_time_min_masks": 2,
67
+ "mask_time_prob": 0.75,
68
+ "model_type": "wav2vec2",
69
+ "num_adapter_layers": 3,
70
+ "num_attention_heads": 16,
71
+ "num_codevector_groups": 2,
72
+ "num_codevectors_per_group": 320,
73
+ "num_conv_pos_embedding_groups": 16,
74
+ "num_conv_pos_embeddings": 128,
75
+ "num_feat_extract_layers": 7,
76
+ "num_hidden_layers": 24,
77
+ "num_negatives": 100,
78
+ "output_hidden_size": 1024,
79
+ "pad_token_id": 58,
80
+ "proj_codevector_dim": 768,
81
+ "tdnn_dilation": [
82
+ 1,
83
+ 2,
84
+ 3,
85
+ 1,
86
+ 1
87
+ ],
88
+ "tdnn_dim": [
89
+ 512,
90
+ 512,
91
+ 512,
92
+ 512,
93
+ 1500
94
+ ],
95
+ "tdnn_kernel": [
96
+ 5,
97
+ 3,
98
+ 3,
99
+ 1,
100
+ 1
101
+ ],
102
+ "torch_dtype": "float32",
103
+ "transformers_version": "4.17.0.dev0",
104
+ "use_weighted_layer_sum": false,
105
+ "vocab_size": 61,
106
+ "xvector_output_dim": 512
107
+ }
eval.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import argparse
3
+ import re
4
+ from typing import Dict
5
+
6
+ import torch
7
+ from datasets import Audio, Dataset, load_dataset, load_metric
8
+
9
+ from transformers import AutoFeatureExtractor, pipeline
10
+
11
+
12
+ def log_results(result: Dataset, args: Dict[str, str]):
13
+ """DO NOT CHANGE. This function computes and logs the result metrics."""
14
+
15
+ log_outputs = args.log_outputs
16
+ dataset_id = "_".join(args.dataset.split("/") + [args.config, args.split])
17
+
18
+ # load metric
19
+ wer = load_metric("wer")
20
+ cer = load_metric("cer")
21
+
22
+ # compute metrics
23
+ wer_result = wer.compute(references=result["target"], predictions=result["prediction"])
24
+ cer_result = cer.compute(references=result["target"], predictions=result["prediction"])
25
+
26
+ # print & log results
27
+ result_str = f"WER: {wer_result}\n" f"CER: {cer_result}"
28
+ print(result_str)
29
+
30
+ with open(f"{dataset_id}_eval_results.txt", "w") as f:
31
+ f.write(result_str)
32
+
33
+ # log all results in text file. Possibly interesting for analysis
34
+ if log_outputs is not None:
35
+ pred_file = f"log_{dataset_id}_predictions.txt"
36
+ target_file = f"log_{dataset_id}_targets.txt"
37
+
38
+ with open(pred_file, "w") as p, open(target_file, "w") as t:
39
+
40
+ # mapping function to write output
41
+ def write_to_file(batch, i):
42
+ p.write(f"{i}" + "\n")
43
+ p.write(batch["prediction"] + "\n")
44
+ t.write(f"{i}" + "\n")
45
+ t.write(batch["target"] + "\n")
46
+
47
+ result.map(write_to_file, with_indices=True)
48
+
49
+
50
+ def normalize_text(text: str) -> str:
51
+ """DO ADAPT FOR YOUR USE CASE. this function normalizes the target text."""
52
+
53
+ chars_to_ignore_regex = '[,?.!\-\;\:"“%‘”�—’…–]' # noqa: W605 IMPORTANT: this should correspond to the chars that were ignored during training
54
+
55
+ text = re.sub(chars_to_ignore_regex, "", text.lower())
56
+
57
+ # In addition, we can normalize the target text, e.g. removing new lines characters etc...
58
+ # note that order is important here!
59
+ token_sequences_to_ignore = ["\n\n", "\n", " ", " "]
60
+
61
+ for t in token_sequences_to_ignore:
62
+ text = " ".join(text.split(t))
63
+
64
+ return text
65
+
66
+
67
+ def main(args):
68
+ # load dataset
69
+ dataset = load_dataset(args.dataset, args.config, split=args.split, use_auth_token=True)
70
+
71
+ # for testing: only process the first two examples as a test
72
+ # dataset = dataset.select(range(10))
73
+
74
+ # load processor
75
+ feature_extractor = AutoFeatureExtractor.from_pretrained(args.model_id)
76
+ sampling_rate = feature_extractor.sampling_rate
77
+
78
+ # resample audio
79
+ dataset = dataset.cast_column("audio", Audio(sampling_rate=sampling_rate))
80
+
81
+ # load eval pipeline
82
+ if args.device is None:
83
+ args.device = 0 if torch.cuda.is_available() else -1
84
+ asr = pipeline("automatic-speech-recognition", model=args.model_id, device=args.device)
85
+
86
+ # map function to decode audio
87
+ def map_to_pred(batch):
88
+ prediction = asr(
89
+ batch["audio"]["array"], chunk_length_s=args.chunk_length_s, stride_length_s=args.stride_length_s
90
+ )
91
+
92
+ batch["prediction"] = prediction["text"]
93
+ batch["target"] = normalize_text(batch["sentence"])
94
+ return batch
95
+
96
+ # run inference on all examples
97
+ result = dataset.map(map_to_pred, remove_columns=dataset.column_names)
98
+
99
+ # compute and log_results
100
+ # do not change function below
101
+ log_results(result, args)
102
+
103
+
104
+ if __name__ == "__main__":
105
+ parser = argparse.ArgumentParser()
106
+
107
+ parser.add_argument(
108
+ "--model_id", type=str, required=True, help="Model identifier. Should be loadable with 🤗 Transformers"
109
+ )
110
+ parser.add_argument(
111
+ "--dataset",
112
+ type=str,
113
+ required=True,
114
+ help="Dataset name to evaluate the `model_id`. Should be loadable with 🤗 Datasets",
115
+ )
116
+ parser.add_argument(
117
+ "--config", type=str, required=True, help="Config of the dataset. *E.g.* `'en'` for Common Voice"
118
+ )
119
+ parser.add_argument("--split", type=str, required=True, help="Split of the dataset. *E.g.* `'test'`")
120
+ parser.add_argument(
121
+ "--chunk_length_s", type=float, default=None, help="Chunk length in seconds. Defaults to 5 seconds."
122
+ )
123
+ parser.add_argument(
124
+ "--stride_length_s", type=float, default=None, help="Stride of the audio chunks. Defaults to 1 second."
125
+ )
126
+ parser.add_argument(
127
+ "--log_outputs", action="store_true", help="If defined, write outputs to log file for analysis."
128
+ )
129
+ parser.add_argument(
130
+ "--device",
131
+ type=int,
132
+ default=None,
133
+ help="The device to run the pipeline on. -1 for CPU (default), 0 for the first GPU and so on.",
134
+ )
135
+ args = parser.parse_args()
136
+
137
+ main(args)
nohup.out ADDED
@@ -0,0 +1,550 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/1 [00:00<?, ?ba/s]
 
1
  0%| | 0/1 [00:00<?, ?ba/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  0%| | 0/1 [00:00<?, ?ba/s]
 
3
  0%| | 0/1 [00:00<?, ?ba/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  0%| | 0/1250 [00:00<?, ?it/s]
5
  0%| | 1/1250 [00:02<53:39, 2.58s/it]
6
  0%| | 2/1250 [00:04<43:18, 2.08s/it]
7
  0%| | 3/1250 [00:05<37:42, 1.81s/it]
8
  0%| | 4/1250 [00:07<34:12, 1.65s/it]
9
  0%| | 5/1250 [00:08<31:07, 1.50s/it]
10
  0%| | 6/1250 [00:09<27:31, 1.33s/it]
11
  1%| | 7/1250 [00:11<35:11, 1.70s/it]
12
  1%| | 8/1250 [00:13<35:39, 1.72s/it]
13
  1%| | 9/1250 [00:15<34:52, 1.69s/it]
14
  1%| | 10/1250 [00:16<33:13, 1.61s/it]
15
  1%| | 11/1250 [00:17<30:26, 1.47s/it]
16
  1%| | 12/1250 [00:18<27:37, 1.34s/it]
17
  1%| | 13/1250 [00:21<34:42, 1.68s/it]
18
  1%| | 14/1250 [00:23<34:49, 1.69s/it]
19
  1%| | 15/1250 [00:24<34:04, 1.66s/it]
20
  1%|▏ | 16/1250 [00:26<32:22, 1.57s/it]
21
  1%|▏ | 17/1250 [00:27<29:57, 1.46s/it]
22
  1%|▏ | 18/1250 [00:28<26:57, 1.31s/it]
23
  2%|▏ | 19/1250 [00:30<32:14, 1.57s/it]
24
  2%|▏ | 20/1250 [00:32<32:52, 1.60s/it]
25
  2%|▏ | 21/1250 [00:33<32:10, 1.57s/it]
26
  2%|▏ | 22/1250 [00:34<30:44, 1.50s/it]
27
  2%|▏ | 23/1250 [00:36<28:38, 1.40s/it]
28
  2%|▏ | 24/1250 [00:37<26:12, 1.28s/it]
29
  2%|▏ | 25/1250 [00:38<28:56, 1.42s/it]
30
  2%|▏ | 26/1250 [00:41<38:28, 1.89s/it]
31
  2%|▏ | 27/1250 [00:43<37:43, 1.85s/it]
32
  2%|▏ | 28/1250 [00:45<35:59, 1.77s/it]
33
  2%|▏ | 29/1250 [00:46<33:21, 1.64s/it]
34
  2%|▏ | 30/1250 [00:47<30:47, 1.51s/it]
35
  2%|▏ | 31/1250 [00:48<27:31, 1.36s/it]
36
  3%|▎ | 32/1250 [00:51<33:43, 1.66s/it]
37
  3%|▎ | 33/1250 [00:52<34:25, 1.70s/it]
38
  3%|▎ | 34/1250 [00:54<33:34, 1.66s/it]
39
  3%|▎ | 35/1250 [00:55<31:55, 1.58s/it]
40
  3%|▎ | 36/1250 [00:56<29:40, 1.47s/it]
41
  3%|▎ | 37/1250 [00:58<26:54, 1.33s/it]
42
  3%|▎ | 38/1250 [01:00<33:02, 1.64s/it]
43
  3%|▎ | 39/1250 [01:02<33:43, 1.67s/it]
44
  3%|▎ | 40/1250 [01:03<32:32, 1.61s/it]
45
  3%|▎ | 41/1250 [01:04<30:50, 1.53s/it]
46
  3%|▎ | 42/1250 [01:06<28:27, 1.41s/it]
47
  3%|▎ | 43/1250 [01:07<25:45, 1.28s/it]
48
  4%|▎ | 44/1250 [01:09<31:31, 1.57s/it]
49
  4%|▎ | 45/1250 [01:10<32:20, 1.61s/it]
50
  4%|▎ | 46/1250 [01:12<31:58, 1.59s/it]
51
  4%|▍ | 47/1250 [01:13<30:42, 1.53s/it]
52
  4%|▍ | 48/1250 [01:15<28:46, 1.44s/it]
53
  4%|▍ | 49/1250 [01:16<26:22, 1.32s/it]
54
  4%|▍ | 50/1250 [01:17<29:12, 1.46s/it]
55
  4%|▍ | 51/1250 [01:20<37:20, 1.87s/it]
56
  4%|▍ | 52/1250 [01:22<36:38, 1.83s/it]
57
  4%|▍ | 53/1250 [01:24<35:02, 1.76s/it]
58
  4%|▍ | 54/1250 [01:25<32:55, 1.65s/it]
59
  4%|▍ | 55/1250 [01:26<30:10, 1.51s/it]
60
  4%|▍ | 56/1250 [01:27<26:59, 1.36s/it]
61
  5%|▍ | 57/1250 [01:29<32:08, 1.62s/it]
62
  5%|▍ | 58/1250 [01:31<32:44, 1.65s/it]
63
  5%|▍ | 59/1250 [01:33<31:55, 1.61s/it]
64
  5%|▍ | 60/1250 [01:34<30:25, 1.53s/it]
65
  5%|▍ | 61/1250 [01:35<28:26, 1.44s/it]
66
  5%|▍ | 62/1250 [01:36<26:14, 1.33s/it]
67
  5%|▌ | 63/1250 [01:39<32:19, 1.63s/it]
68
  5%|▌ | 64/1250 [01:40<33:12, 1.68s/it]
69
  5%|▌ | 65/1250 [01:42<32:10, 1.63s/it]
70
  5%|▌ | 66/1250 [01:43<30:43, 1.56s/it]
71
  5%|▌ | 67/1250 [01:45<28:35, 1.45s/it]
72
  5%|▌ | 68/1250 [01:46<25:57, 1.32s/it]
73
  6%|▌ | 69/1250 [01:48<33:24, 1.70s/it]
74
  6%|▌ | 70/1250 [01:50<34:10, 1.74s/it]
75
  6%|▌ | 71/1250 [01:51<32:41, 1.66s/it]
76
  6%|▌ | 72/1250 [01:53<30:43, 1.57s/it]
77
  6%|▌ | 73/1250 [01:54<28:32, 1.46s/it]
78
  6%|▌ | 74/1250 [01:55<25:40, 1.31s/it]
79
  6%|▌ | 75/1250 [01:57<28:23, 1.45s/it]
80
  6%|▌ | 76/1250 [02:00<37:02, 1.89s/it]
81
  6%|▌ | 77/1250 [02:01<36:23, 1.86s/it]
82
  6%|▌ | 78/1250 [02:03<34:41, 1.78s/it]
83
  6%|▋ | 79/1250 [02:04<32:21, 1.66s/it]
84
  6%|▋ | 80/1250 [02:06<29:42, 1.52s/it]
85
  6%|▋ | 81/1250 [02:07<26:30, 1.36s/it]
86
  7%|▋ | 82/1250 [02:09<32:23, 1.66s/it]
87
  7%|▋ | 83/1250 [02:11<32:49, 1.69s/it]
88
  7%|▋ | 84/1250 [02:12<31:56, 1.64s/it]
89
  7%|▋ | 85/1250 [02:14<30:09, 1.55s/it]
90
  7%|▋ | 86/1250 [02:15<27:51, 1.44s/it]
91
  7%|▋ | 87/1250 [02:16<25:16, 1.30s/it]
92
  7%|▋ | 88/1250 [02:18<32:07, 1.66s/it]
93
  7%|▋ | 89/1250 [02:20<32:48, 1.70s/it]
94
  7%|▋ | 90/1250 [02:22<31:43, 1.64s/it]
95
  7%|▋ | 91/1250 [02:23<30:21, 1.57s/it]
96
  7%|▋ | 92/1250 [02:24<28:26, 1.47s/it]
97
  7%|▋ | 93/1250 [02:25<26:29, 1.37s/it]
98
  8%|▊ | 94/1250 [02:28<31:23, 1.63s/it]
99
  8%|▊ | 95/1250 [02:29<31:51, 1.66s/it]
100
  8%|▊ | 96/1250 [02:31<30:50, 1.60s/it]
101
  8%|▊ | 97/1250 [02:32<29:30, 1.54s/it]
102
  8%|▊ | 98/1250 [02:33<27:25, 1.43s/it]
103
  8%|▊ | 99/1250 [02:34<24:44, 1.29s/it]
104
  8%|▊ | 100/1250 [02:36<26:59, 1.41s/it]
105
 
106
  8%|▊ | 100/1250 [02:36<26:59, 1.41s/it]
107
  8%|▊ | 101/1250 [02:39<35:08, 1.84s/it]
108
  8%|▊ | 102/1250 [02:41<34:28, 1.80s/it]
109
  8%|▊ | 103/1250 [02:42<32:30, 1.70s/it]
110
  8%|▊ | 104/1250 [02:43<30:05, 1.58s/it]
111
  8%|▊ | 105/1250 [02:44<27:37, 1.45s/it]
112
  8%|▊ | 106/1250 [02:45<24:44, 1.30s/it]
113
  9%|▊ | 107/1250 [02:48<30:38, 1.61s/it]
114
  9%|▊ | 108/1250 [02:49<31:24, 1.65s/it]
115
  9%|▊ | 109/1250 [02:51<30:45, 1.62s/it]
116
  9%|▉ | 110/1250 [02:52<29:22, 1.55s/it]
117
  9%|▉ | 111/1250 [02:54<27:20, 1.44s/it]
118
  9%|▉ | 112/1250 [02:55<24:41, 1.30s/it]
119
  9%|▉ | 113/1250 [02:57<29:34, 1.56s/it]
120
  9%|▉ | 114/1250 [02:58<30:11, 1.59s/it]
121
  9%|▉ | 115/1250 [03:00<29:50, 1.58s/it]
122
  9%|▉ | 116/1250 [03:01<28:49, 1.53s/it]
123
  9%|▉ | 117/1250 [03:03<26:52, 1.42s/it]
124
  9%|▉ | 118/1250 [03:04<24:25, 1.29s/it]
125
  10%|▉ | 119/1250 [03:06<30:27, 1.62s/it]
126
  10%|▉ | 120/1250 [03:08<31:40, 1.68s/it]
127
  10%|▉ | 121/1250 [03:09<30:52, 1.64s/it]
128
  10%|▉ | 122/1250 [03:11<29:20, 1.56s/it]
129
  10%|▉ | 123/1250 [03:12<27:11, 1.45s/it]
130
  10%|▉ | 124/1250 [03:13<24:31, 1.31s/it]
131
  10%|█ | 125/1250 [03:15<28:39, 1.53s/it]
132
  10%|█ | 126/1250 [03:18<36:45, 1.96s/it]
133
  10%|█ | 127/1250 [03:20<35:48, 1.91s/it]
134
  10%|█ | 128/1250 [03:21<33:48, 1.81s/it]
135
  10%|█ | 129/1250 [03:23<31:32, 1.69s/it]
136
  10%|█ | 130/1250 [03:24<28:58, 1.55s/it]
137
  10%|█ | 131/1250 [03:25<25:53, 1.39s/it]
138
  11%|█ | 132/1250 [03:27<31:02, 1.67s/it]
139
  11%|█ | 133/1250 [03:29<31:42, 1.70s/it]
140
  11%|█ | 134/1250 [03:30<30:44, 1.65s/it]
141
  11%|█ | 135/1250 [03:32<29:06, 1.57s/it]
142
  11%|█ | 136/1250 [03:33<26:59, 1.45s/it]
143
  11%|█ | 137/1250 [03:34<24:33, 1.32s/it]
144
  11%|█ | 138/1250 [03:36<30:43, 1.66s/it]
145
  11%|█ | 139/1250 [03:38<31:10, 1.68s/it]
146
  11%|█ | 140/1250 [03:40<30:34, 1.65s/it]
147
  11%|█▏ | 141/1250 [03:41<29:08, 1.58s/it]
148
  11%|█▏ | 142/1250 [03:42<26:41, 1.45s/it]
149
  11%|█▏ | 143/1250 [03:43<24:14, 1.31s/it]
150
  12%|█▏ | 144/1250 [03:46<29:19, 1.59s/it]
151
  12%|█▏ | 145/1250 [03:47<29:55, 1.62s/it]
152
  12%|█▏ | 146/1250 [03:49<29:20, 1.59s/it]
153
  12%|█▏ | 147/1250 [03:50<28:09, 1.53s/it]
154
  12%|█▏ | 148/1250 [03:51<26:42, 1.45s/it]
155
  12%|█▏ | 149/1250 [03:52<24:23, 1.33s/it]
156
  12%|█▏ | 150/1250 [03:54<27:24, 1.49s/it]
157
  12%|█▏ | 151/1250 [03:57<34:21, 1.88s/it]
158
  12%|█▏ | 152/1250 [03:59<33:23, 1.82s/it]
159
  12%|█▏ | 153/1250 [04:00<31:32, 1.72s/it]
160
  12%|█▏ | 154/1250 [04:02<29:24, 1.61s/it]
161
  12%|█▏ | 155/1250 [04:03<27:13, 1.49s/it]
162
  12%|█▏ | 156/1250 [04:04<24:24, 1.34s/it]
163
  13%|█▎ | 157/1250 [04:06<30:38, 1.68s/it]
164
  13%|█▎ | 158/1250 [04:08<30:45, 1.69s/it]
165
  13%|█▎ | 159/1250 [04:10<30:02, 1.65s/it]
166
  13%|█▎ | 160/1250 [04:11<28:40, 1.58s/it]
167
  13%|█▎ | 161/1250 [04:12<26:29, 1.46s/it]
168
  13%|█▎ | 162/1250 [04:13<23:57, 1.32s/it]
169
  13%|█▎ | 163/1250 [04:16<29:54, 1.65s/it]
170
  13%|█▎ | 164/1250 [04:17<30:57, 1.71s/it]
171
  13%|█▎ | 165/1250 [04:19<29:58, 1.66s/it]
172
  13%|█▎ | 166/1250 [04:20<28:18, 1.57s/it]
173
  13%|█▎ | 167/1250 [04:22<26:21, 1.46s/it]
174
  13%|█▎ | 168/1250 [04:23<23:44, 1.32s/it]
175
  14%|█▎ | 169/1250 [04:25<29:27, 1.63s/it]
176
  14%|█▎ | 170/1250 [04:27<30:05, 1.67s/it]
177
  14%|█▎ | 171/1250 [04:28<29:08, 1.62s/it]
178
  14%|█▍ | 172/1250 [04:30<27:20, 1.52s/it]
179
  14%|█▍ | 173/1250 [04:31<25:27, 1.42s/it]
180
  14%|█▍ | 174/1250 [04:32<22:57, 1.28s/it]
181
  14%|█▍ | 175/1250 [04:33<25:14, 1.41s/it]
182
  14%|█▍ | 176/1250 [04:36<32:49, 1.83s/it]
183
  14%|█▍ | 177/1250 [04:38<32:19, 1.81s/it]
184
  14%|█▍ | 178/1250 [04:39<30:54, 1.73s/it]
185
  14%|█▍ | 179/1250 [04:41<29:09, 1.63s/it]
186
  14%|█▍ | 180/1250 [04:42<26:40, 1.50s/it]
187
  14%|█▍ | 181/1250 [04:43<23:54, 1.34s/it]
188
  15%|█▍ | 182/1250 [04:45<28:58, 1.63s/it]
189
  15%|█▍ | 183/1250 [04:47<29:22, 1.65s/it]
190
  15%|█▍ | 184/1250 [04:49<28:39, 1.61s/it]
191
  15%|█▍ | 185/1250 [04:50<27:09, 1.53s/it]
192
  15%|█▍ | 186/1250 [04:51<25:10, 1.42s/it]
193
  15%|█▍ | 187/1250 [04:52<22:49, 1.29s/it]
194
  15%|█▌ | 188/1250 [04:55<29:02, 1.64s/it]
195
  15%|█▌ | 189/1250 [04:56<29:43, 1.68s/it]
196
  15%|█▌ | 190/1250 [04:58<28:55, 1.64s/it]
197
  15%|█▌ | 191/1250 [04:59<27:14, 1.54s/it]
198
  15%|█▌ | 192/1250 [05:00<25:07, 1.43s/it]
199
  15%|█▌ | 193/1250 [05:01<22:42, 1.29s/it]
200
  16%|█▌ | 194/1250 [05:04<27:52, 1.58s/it]
201
  16%|█▌ | 195/1250 [05:05<28:41, 1.63s/it]
202
  16%|█▌ | 196/1250 [05:07<28:06, 1.60s/it]
203
  16%|█▌ | 197/1250 [05:08<27:08, 1.55s/it]
204
  16%|█▌ | 198/1250 [05:09<25:40, 1.46s/it]
205
  16%|█▌ | 199/1250 [05:11<23:23, 1.34s/it]
206
  16%|█▌ | 200/1250 [05:12<26:23, 1.51s/it]
207
 
208
  16%|█▌ | 200/1250 [05:12<26:23, 1.51s/it]
209
  16%|█▌ | 201/1250 [05:15<33:33, 1.92s/it]
210
  16%|█▌ | 202/1250 [05:17<32:28, 1.86s/it]
211
  16%|█▌ | 203/1250 [05:19<30:40, 1.76s/it]
212
  16%|█▋ | 204/1250 [05:20<28:51, 1.66s/it]
213
  16%|█▋ | 205/1250 [05:21<26:31, 1.52s/it]
214
  16%|█▋ | 206/1250 [05:22<23:42, 1.36s/it]
215
  17%|█▋ | 207/1250 [05:25<29:02, 1.67s/it]
216
  17%|█▋ | 208/1250 [05:26<29:31, 1.70s/it]
217
  17%|█▋ | 209/1250 [05:28<28:26, 1.64s/it]
218
  17%|█▋ | 210/1250 [05:29<26:54, 1.55s/it]
219
  17%|█▋ | 211/1250 [05:30<24:56, 1.44s/it]
220
  17%|█▋ | 212/1250 [05:31<22:33, 1.30s/it]
221
  17%|█▋ | 213/1250 [05:34<28:02, 1.62s/it]
222
  17%|█▋ | 214/1250 [05:35<28:39, 1.66s/it]
223
  17%|█▋ | 215/1250 [05:37<27:54, 1.62s/it]
224
  17%|█▋ | 216/1250 [05:38<26:40, 1.55s/it]
225
  17%|█▋ | 217/1250 [05:40<24:52, 1.44s/it]
226
  17%|█▋ | 218/1250 [05:41<22:36, 1.31s/it]
227
  18%|█▊ | 219/1250 [05:43<27:51, 1.62s/it]
228
  18%|█▊ | 220/1250 [05:45<28:23, 1.65s/it]
229
  18%|█▊ | 221/1250 [05:46<27:36, 1.61s/it]
230
  18%|█▊ | 222/1250 [05:47<26:09, 1.53s/it]
231
  18%|█▊ | 223/1250 [05:49<24:20, 1.42s/it]
232
  18%|█▊ | 224/1250 [05:50<22:20, 1.31s/it]
233
  18%|█▊ | 225/1250 [05:52<25:04, 1.47s/it]
234
  18%|█▊ | 226/1250 [05:54<32:16, 1.89s/it]
235
  18%|█▊ | 227/1250 [05:56<31:41, 1.86s/it]
236
  18%|█▊ | 228/1250 [05:58<30:00, 1.76s/it]
237
  18%|█▊ | 229/1250 [05:59<27:50, 1.64s/it]
238
  18%|█▊ | 230/1250 [06:00<25:26, 1.50s/it]
239
  18%|█▊ | 231/1250 [06:01<22:49, 1.34s/it]
240
  19%|█▊ | 232/1250 [06:04<27:42, 1.63s/it]
241
  19%|█▊ | 233/1250 [06:05<28:19, 1.67s/it]
242
  19%|█▊ | 234/1250 [06:07<27:47, 1.64s/it]
243
  19%|█▉ | 235/1250 [06:08<26:41, 1.58s/it]
244
  19%|█▉ | 236/1250 [06:10<25:01, 1.48s/it]
245
  19%|█▉ | 237/1250 [06:11<22:57, 1.36s/it]
246
  19%|█▉ | 238/1250 [06:13<28:17, 1.68s/it]
247
  19%|█▉ | 239/1250 [06:15<28:30, 1.69s/it]
248
  19%|█▉ | 240/1250 [06:16<27:31, 1.64s/it]
249
  19%|█▉ | 241/1250 [06:18<25:47, 1.53s/it]
250
  19%|█▉ | 242/1250 [06:19<23:55, 1.42s/it]
251
  19%|█▉ | 243/1250 [06:20<21:42, 1.29s/it]
252
  20%|█▉ | 244/1250 [06:22<27:29, 1.64s/it]
253
  20%|█▉ | 245/1250 [06:24<27:55, 1.67s/it]
254
  20%|█▉ | 246/1250 [06:25<27:02, 1.62s/it]
255
  20%|█▉ | 247/1250 [06:27<25:41, 1.54s/it]
256
  20%|█▉ | 248/1250 [06:28<23:48, 1.43s/it]
257
  20%|█▉ | 249/1250 [06:29<21:24, 1.28s/it]
258
  20%|██ | 250/1250 [06:31<23:19, 1.40s/it]
259
  20%|██ | 251/1250 [06:34<31:22, 1.88s/it]
260
  20%|██ | 252/1250 [06:35<30:47, 1.85s/it]
261
  20%|██ | 253/1250 [06:37<28:53, 1.74s/it]
262
  20%|██ | 254/1250 [06:38<26:57, 1.62s/it]
263
  20%|██ | 255/1250 [06:39<24:37, 1.48s/it]
264
  20%|██ | 256/1250 [06:40<22:05, 1.33s/it]
265
  21%|██ | 257/1250 [06:43<27:33, 1.66s/it]
266
  21%|██ | 258/1250 [06:45<28:01, 1.69s/it]
267
  21%|██ | 259/1250 [06:46<27:31, 1.67s/it]
268
  21%|██ | 260/1250 [06:48<26:14, 1.59s/it]
269
  21%|██ | 261/1250 [06:49<24:25, 1.48s/it]
270
  21%|██ | 262/1250 [06:50<21:49, 1.33s/it]
271
  21%|██ | 263/1250 [06:52<26:46, 1.63s/it]
272
  21%|██ | 264/1250 [06:54<27:14, 1.66s/it]
273
  21%|██ | 265/1250 [06:55<26:29, 1.61s/it]
274
  21%|██▏ | 266/1250 [06:57<25:03, 1.53s/it]
275
  21%|██▏ | 267/1250 [06:58<23:19, 1.42s/it]
276
  21%|██▏ | 268/1250 [06:59<21:12, 1.30s/it]
277
  22%|██▏ | 269/1250 [07:01<25:45, 1.58s/it]
278
  22%|██▏ | 270/1250 [07:03<26:41, 1.63s/it]
279
  22%|██▏ | 271/1250 [07:04<26:09, 1.60s/it]
280
  22%|██▏ | 272/1250 [07:06<24:53, 1.53s/it]
281
  22%|██▏ | 273/1250 [07:07<23:15, 1.43s/it]
282
  22%|██▏ | 274/1250 [07:08<21:06, 1.30s/it]
283
  22%|██▏ | 275/1250 [07:10<22:48, 1.40s/it]
284
  22%|██▏ | 276/1250 [07:12<29:24, 1.81s/it]
285
  22%|██▏ | 277/1250 [07:14<29:19, 1.81s/it]
286
  22%|██▏ | 278/1250 [07:16<27:58, 1.73s/it]
287
  22%|██▏ | 279/1250 [07:17<26:26, 1.63s/it]
288
  22%|██▏ | 280/1250 [07:18<24:16, 1.50s/it]
289
  22%|██▏ | 281/1250 [07:19<21:57, 1.36s/it]
290
  23%|██▎ | 282/1250 [07:22<26:51, 1.66s/it]
291
  23%|██▎ | 283/1250 [07:23<27:18, 1.69s/it]
292
  23%|██▎ | 284/1250 [07:25<26:33, 1.65s/it]
293
  23%|██▎ | 285/1250 [07:26<25:13, 1.57s/it]
294
  23%|██▎ | 286/1250 [07:28<23:27, 1.46s/it]
295
  23%|██▎ | 287/1250 [07:29<21:21, 1.33s/it]
296
  23%|██▎ | 288/1250 [07:31<26:40, 1.66s/it]
297
  23%|██▎ | 289/1250 [07:33<26:49, 1.68s/it]
298
  23%|██▎ | 290/1250 [07:34<26:15, 1.64s/it]
299
  23%|██▎ | 291/1250 [07:36<24:52, 1.56s/it]
300
  23%|██▎ | 292/1250 [07:37<22:56, 1.44s/it]
301
  23%|██▎ | 293/1250 [07:38<20:36, 1.29s/it]
302
  24%|██▎ | 294/1250 [07:40<25:20, 1.59s/it]
303
  24%|██▎ | 295/1250 [07:42<25:45, 1.62s/it]
304
  24%|██▎ | 296/1250 [07:43<24:59, 1.57s/it]
305
  24%|██▍ | 297/1250 [07:44<23:42, 1.49s/it]
306
  24%|██▍ | 298/1250 [07:46<22:08, 1.40s/it]
307
  24%|██▍ | 299/1250 [07:47<20:04, 1.27s/it]
308
  24%|██▍ | 300/1250 [07:48<22:58, 1.45s/it]
309
 
310
  24%|██▍ | 300/1250 [07:48<22:58, 1.45s/it]
311
  24%|██▍ | 301/1250 [07:51<29:17, 1.85s/it]
312
  24%|██▍ | 302/1250 [07:53<28:36, 1.81s/it]
313
  24%|██▍ | 303/1250 [07:55<27:16, 1.73s/it]
314
  24%|██▍ | 304/1250 [07:56<25:46, 1.63s/it]
315
  24%|██▍ | 305/1250 [07:57<23:38, 1.50s/it]
316
  24%|██▍ | 306/1250 [07:58<21:16, 1.35s/it]
317
  25%|██▍ | 307/1250 [08:00<25:45, 1.64s/it]
318
  25%|██▍ | 308/1250 [08:02<26:24, 1.68s/it]
319
  25%|██▍ | 309/1250 [08:04<25:52, 1.65s/it]
320
  25%|██▍ | 310/1250 [08:05<24:30, 1.56s/it]
321
  25%|██▍ | 311/1250 [08:06<22:52, 1.46s/it]
322
  25%|██▍ | 312/1250 [08:07<20:47, 1.33s/it]
323
  25%|██▌ | 313/1250 [08:10<26:16, 1.68s/it]
324
  25%|██▌ | 314/1250 [08:12<26:45, 1.72s/it]
325
  25%|██▌ | 315/1250 [08:13<25:53, 1.66s/it]
326
  25%|██▌ | 316/1250 [08:15<24:24, 1.57s/it]
327
  25%|██▌ | 317/1250 [08:16<22:36, 1.45s/it]
328
  25%|█��▌ | 318/1250 [08:17<20:24, 1.31s/it]
329
  26%|██▌ | 319/1250 [08:19<25:10, 1.62s/it]
330
  26%|██▌ | 320/1250 [08:21<25:30, 1.65s/it]
331
  26%|██▌ | 321/1250 [08:22<24:42, 1.60s/it]
332
  26%|██▌ | 322/1250 [08:24<23:36, 1.53s/it]
333
  26%|██▌ | 323/1250 [08:25<21:59, 1.42s/it]
334
  26%|██▌ | 324/1250 [08:26<19:52, 1.29s/it]
335
  26%|██▌ | 325/1250 [08:28<22:51, 1.48s/it]
336
  26%|██▌ | 326/1250 [08:31<28:59, 1.88s/it]
337
  26%|██▌ | 327/1250 [08:32<28:15, 1.84s/it]
338
  26%|██▌ | 328/1250 [08:34<26:38, 1.73s/it]
339
  26%|██▋ | 329/1250 [08:35<24:51, 1.62s/it]
340
  26%|██▋ | 330/1250 [08:36<22:40, 1.48s/it]
341
  26%|██▋ | 331/1250 [08:37<20:21, 1.33s/it]
342
  27%|██▋ | 332/1250 [08:40<24:40, 1.61s/it]
343
  27%|██▋ | 333/1250 [08:41<25:06, 1.64s/it]
344
  27%|██▋ | 334/1250 [08:43<24:38, 1.61s/it]
345
  27%|██▋ | 335/1250 [08:44<23:33, 1.54s/it]
346
  27%|██▋ | 336/1250 [08:45<21:48, 1.43s/it]
347
  27%|██▋ | 337/1250 [08:46<19:37, 1.29s/it]
348
  27%|██▋ | 338/1250 [08:49<24:31, 1.61s/it]
349
  27%|██▋ | 339/1250 [08:50<25:13, 1.66s/it]
350
  27%|██▋ | 340/1250 [08:52<24:29, 1.61s/it]
351
  27%|██▋ | 341/1250 [08:53<23:28, 1.55s/it]
352
  27%|██▋ | 342/1250 [08:55<21:50, 1.44s/it]
353
  27%|██▋ | 343/1250 [08:56<20:01, 1.32s/it]
354
  28%|██▊ | 344/1250 [08:58<24:53, 1.65s/it]
355
  28%|██▊ | 345/1250 [09:00<25:11, 1.67s/it]
356
  28%|██▊ | 346/1250 [09:01<24:32, 1.63s/it]
357
  28%|██▊ | 347/1250 [09:03<23:18, 1.55s/it]
358
  28%|██▊ | 348/1250 [09:04<21:27, 1.43s/it]
359
  28%|██▊ | 349/1250 [09:05<19:20, 1.29s/it]
360
  28%|██▊ | 350/1250 [09:07<22:39, 1.51s/it]
361
  28%|██▊ | 351/1250 [09:10<29:06, 1.94s/it]
362
  28%|██▊ | 352/1250 [09:12<28:24, 1.90s/it]
363
  28%|██▊ | 353/1250 [09:13<26:48, 1.79s/it]
364
  28%|██▊ | 354/1250 [09:14<24:54, 1.67s/it]
365
  28%|██▊ | 355/1250 [09:16<22:40, 1.52s/it]
366
  28%|██▊ | 356/1250 [09:17<20:29, 1.38s/it]
367
  29%|██▊ | 357/1250 [09:19<24:22, 1.64s/it]
368
  29%|██▊ | 358/1250 [09:21<24:35, 1.65s/it]
369
  29%|██▊ | 359/1250 [09:22<23:54, 1.61s/it]
370
  29%|██▉ | 360/1250 [09:23<22:44, 1.53s/it]
371
  29%|██▉ | 361/1250 [09:25<21:14, 1.43s/it]
372
  29%|██▉ | 362/1250 [09:26<19:13, 1.30s/it]
373
  29%|██▉ | 363/1250 [09:28<23:56, 1.62s/it]
374
  29%|██▉ | 364/1250 [09:30<24:15, 1.64s/it]
375
  29%|██▉ | 365/1250 [09:31<23:39, 1.60s/it]
376
  29%|██▉ | 366/1250 [09:33<22:30, 1.53s/it]
377
  29%|██▉ | 367/1250 [09:34<21:11, 1.44s/it]
378
  29%|██▉ | 368/1250 [09:35<19:05, 1.30s/it]
379
  30%|██▉ | 369/1250 [09:37<24:00, 1.63s/it]
380
  30%|██▉ | 370/1250 [09:39<24:30, 1.67s/it]
381
  30%|██▉ | 371/1250 [09:40<23:53, 1.63s/it]
382
  30%|██▉ | 372/1250 [09:42<22:41, 1.55s/it]
383
  30%|██▉ | 373/1250 [09:43<20:58, 1.44s/it]
384
  30%|██▉ | 374/1250 [09:44<18:54, 1.30s/it]
385
  30%|███ | 375/1250 [09:46<20:17, 1.39s/it]
386
  30%|███ | 376/1250 [09:48<26:28, 1.82s/it]
387
  30%|███ | 377/1250 [09:50<25:49, 1.78s/it]
388
  30%|███ | 378/1250 [09:52<24:25, 1.68s/it]
389
  30%|███ | 379/1250 [09:53<22:51, 1.57s/it]
390
  30%|███ | 380/1250 [09:54<21:04, 1.45s/it]
391
  30%|███ | 381/1250 [09:55<18:55, 1.31s/it]
392
  31%|███ | 382/1250 [09:57<23:55, 1.65s/it]
393
  31%|███ | 383/1250 [09:59<24:26, 1.69s/it]
394
  31%|███ | 384/1250 [10:01<23:44, 1.64s/it]
395
  31%|███ | 385/1250 [10:02<22:36, 1.57s/it]
396
  31%|███ | 386/1250 [10:03<21:00, 1.46s/it]
397
  31%|███ | 387/1250 [10:04<19:05, 1.33s/it]
398
  31%|███ | 388/1250 [10:07<22:40, 1.58s/it]
399
  31%|███ | 389/1250 [10:08<23:05, 1.61s/it]
400
  31%|███ | 390/1250 [10:10<22:56, 1.60s/it]
401
  31%|███▏ | 391/1250 [10:11<22:10, 1.55s/it]
402
  31%|███▏ | 392/1250 [10:12<20:33, 1.44s/it]
403
  31%|███▏ | 393/1250 [10:13<18:37, 1.30s/it]
404
  32%|███▏ | 394/1250 [10:16<23:25, 1.64s/it]
405
  32%|███▏ | 395/1250 [10:18<23:45, 1.67s/it]
406
  32%|███▏ | 396/1250 [10:19<23:08, 1.63s/it]
407
  32%|███▏ | 397/1250 [10:20<21:55, 1.54s/it]
408
  32%|███▏ | 398/1250 [10:22<20:16, 1.43s/it]
409
  32%|███▏ | 399/1250 [10:23<18:21, 1.29s/it]
410
  32%|███▏ | 400/1250 [10:24<20:34, 1.45s/it]
411
 
412
  32%|███▏ | 400/1250 [10:24<20:34, 1.45s/it]
413
  32%|███▏ | 401/1250 [10:27<26:02, 1.84s/it]
414
  32%|███▏ | 402/1250 [10:29<25:43, 1.82s/it]
415
  32%|███▏ | 403/1250 [10:31<24:36, 1.74s/it]
416
  32%|███▏ | 404/1250 [10:32<22:58, 1.63s/it]
417
  32%|███▏ | 405/1250 [10:33<21:12, 1.51s/it]
418
  32%|███▏ | 406/1250 [10:34<19:08, 1.36s/it]
419
  33%|███▎ | 407/1250 [10:37<23:55, 1.70s/it]
420
  33%|███▎ | 408/1250 [10:38<24:11, 1.72s/it]
421
  33%|███▎ | 409/1250 [10:40<23:27, 1.67s/it]
422
  33%|███▎ | 410/1250 [10:41<22:04, 1.58s/it]
423
  33%|███▎ | 411/1250 [10:42<20:25, 1.46s/it]
424
  33%|███▎ | 412/1250 [10:43<18:26, 1.32s/it]
425
  33%|███▎ | 413/1250 [10:46<22:59, 1.65s/it]
426
  33%|███▎ | 414/1250 [10:48<23:20, 1.68s/it]
427
  33%|███▎ | 415/1250 [10:49<22:33, 1.62s/it]
428
  33%|███▎ | 416/1250 [10:50<21:15, 1.53s/it]
429
  33%|███▎ | 417/1250 [10:52<19:25, 1.40s/it]
430
  33%|███▎ | 418/1250 [10:52<17:26, 1.26s/it]
431
  34%|███▎ | 419/1250 [10:55<21:48, 1.57s/it]
432
  34%|███▎ | 420/1250 [10:56<22:18, 1.61s/it]
433
  34%|███▎ | 421/1250 [10:58<21:51, 1.58s/it]
434
  34%|███▍ | 422/1250 [10:59<21:12, 1.54s/it]
435
  34%|███▍ | 423/1250 [11:01<19:59, 1.45s/it]
436
  34%|███▍ | 424/1250 [11:02<18:08, 1.32s/it]
437
  34%|███▍ | 425/1250 [11:03<19:17, 1.40s/it]
438
  34%|███▍ | 426/1250 [11:06<25:20, 1.85s/it]
439
  34%|███▍ | 427/1250 [11:08<24:54, 1.82s/it]
440
  34%|███▍ | 428/1250 [11:09<23:45, 1.73s/it]
441
  34%|███▍ | 429/1250 [11:11<22:22, 1.64s/it]
442
  34%|███▍ | 430/1250 [11:12<20:29, 1.50s/it]
443
  34%|███▍ | 431/1250 [11:13<18:27, 1.35s/it]
444
  35%|███▍ | 432/1250 [11:15<21:54, 1.61s/it]
445
  35%|███▍ | 433/1250 [11:17<22:24, 1.65s/it]
446
  35%|███▍ | 434/1250 [11:18<21:43, 1.60s/it]
447
  35%|███▍ | 435/1250 [11:20<20:37, 1.52s/it]
448
  35%|███▍ | 436/1250 [11:21<19:27, 1.43s/it]
449
  35%|███▍ | 437/1250 [11:22<17:33, 1.30s/it]
450
  35%|███▌ | 438/1250 [11:24<21:45, 1.61s/it]
451
  35%|███▌ | 439/1250 [11:26<22:22, 1.65s/it]
452
  35%|███▌ | 440/1250 [11:28<21:56, 1.62s/it]
453
  35%|███▌ | 441/1250 [11:29<21:01, 1.56s/it]
454
  35%|███▌ | 442/1250 [11:30<19:33, 1.45s/it]
455
  35%|███▌ | 443/1250 [11:31<17:42, 1.32s/it]
456
  36%|███▌ | 444/1250 [11:34<22:11, 1.65s/it]
457
  36%|███▌ | 445/1250 [11:35<22:31, 1.68s/it]
458
  36%|███▌ | 446/1250 [11:37<22:06, 1.65s/it]
459
  36%|███▌ | 447/1250 [11:38<20:52, 1.56s/it]
460
  36%|███▌ | 448/1250 [11:40<19:07, 1.43s/it]
461
  36%|███▌ | 449/1250 [11:40<17:11, 1.29s/it]
462
  36%|███▌ | 450/1250 [11:42<18:49, 1.41s/it]
463
  36%|███▌ | 451/1250 [11:45<24:40, 1.85s/it]
464
  36%|███▌ | 452/1250 [11:47<24:24, 1.84s/it]
465
  36%|███▌ | 453/1250 [11:48<23:15, 1.75s/it]
466
  36%|███▋ | 454/1250 [11:50<21:50, 1.65s/it]
467
  36%|███▋ | 455/1250 [11:51<20:05, 1.52s/it]
468
  36%|███▋ | 456/1250 [11:52<17:55, 1.35s/it]
469
  37%|███▋ | 457/1250 [11:54<22:22, 1.69s/it]
470
  37%|███▋ | 458/1250 [11:56<22:42, 1.72s/it]
471
  37%|███▋ | 459/1250 [11:58<21:46, 1.65s/it]
472
  37%|███▋ | 460/1250 [11:59<20:25, 1.55s/it]
473
  37%|███▋ | 461/1250 [12:00<18:57, 1.44s/it]
474
  37%|███▋ | 462/1250 [12:01<17:05, 1.30s/it]
475
  37%|███▋ | 463/1250 [12:03<20:37, 1.57s/it]
476
  37%|███▋ | 464/1250 [12:05<21:05, 1.61s/it]
477
  37%|███▋ | 465/1250 [12:07<20:35, 1.57s/it]
478
  37%|███▋ | 466/1250 [12:08<19:39, 1.50s/it]
479
  37%|███▋ | 467/1250 [12:09<18:22, 1.41s/it]
480
  37%|███▋ | 468/1250 [12:10<16:57, 1.30s/it]
481
  38%|███▊ | 469/1250 [12:12<20:46, 1.60s/it]
482
  38%|███▊ | 470/1250 [12:14<21:07, 1.62s/it]
483
  38%|███▊ | 471/1250 [12:16<20:47, 1.60s/it]
484
  38%|███▊ | 472/1250 [12:17<19:43, 1.52s/it]
485
  38%|███▊ | 473/1250 [12:18<18:18, 1.41s/it]
486
  38%|███▊ | 474/1250 [12:19<16:37, 1.29s/it]
487
  38%|███▊ | 475/1250 [12:21<18:49, 1.46s/it]
488
  38%|███▊ | 476/1250 [12:24<24:27, 1.90s/it]
489
  38%|███▊ | 477/1250 [12:26<24:00, 1.86s/it]
490
  38%|███▊ | 478/1250 [12:27<22:39, 1.76s/it]
491
  38%|███▊ | 479/1250 [12:29<21:01, 1.64s/it]
492
  38%|███▊ | 480/1250 [12:30<18:58, 1.48s/it]
493
  38%|███▊ | 481/1250 [12:31<16:44, 1.31s/it]
494
  39%|███▊ | 482/1250 [12:33<20:30, 1.60s/it]
495
  39%|███▊ | 483/1250 [12:35<20:39, 1.62s/it]
496
  39%|███▊ | 484/1250 [12:36<20:30, 1.61s/it]
497
  39%|███▉ | 485/1250 [12:38<19:36, 1.54s/it]
498
  39%|███▉ | 486/1250 [12:39<18:15, 1.43s/it]
499
  39%|███▉ | 487/1250 [12:40<16:37, 1.31s/it]
500
  39%|███▉ | 488/1250 [12:42<20:55, 1.65s/it]
501
  39%|███▉ | 489/1250 [12:44<21:17, 1.68s/it]
502
  39%|███▉ | 490/1250 [12:45<20:34, 1.62s/it]
503
  39%|███▉ | 491/1250 [12:47<19:28, 1.54s/it]
504
  39%|███▉ | 492/1250 [12:48<18:28, 1.46s/it]
505
  39%|███▉ | 493/1250 [12:49<16:39, 1.32s/it]
506
  40%|███▉ | 494/1250 [12:51<20:23, 1.62s/it]
507
  40%|███▉ | 495/1250 [12:53<20:39, 1.64s/it]
508
  40%|███▉ | 496/1250 [12:55<20:11, 1.61s/it]
509
  40%|███▉ | 497/1250 [12:56<19:09, 1.53s/it]
510
  40%|███▉ | 498/1250 [12:57<17:44, 1.42s/it]
511
  40%|███▉ | 499/1250 [12:58<15:57, 1.28s/it]
512
  40%|████ | 500/1250 [13:00<17:17, 1.38s/it]
513
 
514
  40%|████ | 500/1250 [13:00<17:17, 1.38s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.
 
 
 
 
 
 
 
 
 
 
515
  0%| | 0/43 [00:00<?, ?it/s]
 
516
  5%|▍ | 2/43 [00:00<00:04, 9.99it/s]
 
517
  7%|▋ | 3/43 [00:00<00:07, 5.48it/s]
 
518
  9%|▉ | 4/43 [00:00<00:09, 4.26it/s]
 
519
  12%|█▏ | 5/43 [00:01<00:09, 4.09it/s]
 
520
  14%|█▍ | 6/43 [00:01<00:09, 3.90it/s]
 
521
  16%|█▋ | 7/43 [00:01<00:09, 3.85it/s]
 
522
  19%|█▊ | 8/43 [00:01<00:09, 3.77it/s]
 
523
  21%|██ | 9/43 [00:02<00:09, 3.47it/s]
 
524
  23%|██▎ | 10/43 [00:02<00:08, 3.74it/s]
 
525
  26%|██▌ | 11/43 [00:02<00:09, 3.54it/s]
 
526
  28%|██▊ | 12/43 [00:03<00:08, 3.77it/s]
 
527
  30%|███ | 13/43 [00:03<00:07, 3.86it/s]
 
528
  33%|███▎ | 14/43 [00:03<00:08, 3.48it/s]
 
529
  35%|███▍ | 15/43 [00:04<00:09, 2.99it/s]
 
530
  37%|███▋ | 16/43 [00:04<00:08, 3.05it/s]
 
531
  40%|███▉ | 17/43 [00:04<00:08, 3.14it/s]
 
532
  42%|████▏ | 18/43 [00:05<00:08, 3.05it/s]
 
533
  44%|████▍ | 19/43 [00:05<00:08, 2.85it/s]
 
534
  47%|████▋ | 20/43 [00:05<00:09, 2.54it/s]
 
535
  49%|████▉ | 21/43 [00:06<00:08, 2.66it/s]
 
536
  51%|█████ | 22/43 [00:06<00:07, 2.70it/s]
 
537
  53%|█████▎ | 23/43 [00:06<00:07, 2.82it/s]
 
538
  56%|█████▌ | 24/43 [00:07<00:06, 3.09it/s]
 
539
  58%|█████▊ | 25/43 [00:07<00:05, 3.22it/s]
 
540
  60%|██████ | 26/43 [00:07<00:05, 3.21it/s]
 
541
  63%|██████▎ | 27/43 [00:07<00:04, 3.59it/s]
 
542
  65%|██████▌ | 28/43 [00:08<00:04, 3.65it/s]
 
543
  67%|██████▋ | 29/43 [00:08<00:03, 3.76it/s]
 
544
  70%|██████▉ | 30/43 [00:08<00:03, 3.83it/s]
 
545
  72%|███████▏ | 31/43 [00:08<00:03, 3.79it/s]
 
546
  74%|███████▍ | 32/43 [00:09<00:02, 3.85it/s]
 
547
  77%|███████▋ | 33/43 [00:09<00:02, 3.54it/s]
 
548
  79%|███████▉ | 34/43 [00:09<00:02, 3.51it/s]
 
549
  81%|████████▏ | 35/43 [00:10<00:02, 3.51it/s]
 
550
  84%|████████▎ | 36/43 [00:10<00:02, 3.42it/s]
 
551
  86%|████████▌ | 37/43 [00:10<00:01, 3.56it/s]
 
552
  88%|████████▊ | 38/43 [00:11<00:01, 3.32it/s]
 
553
  91%|█████████ | 39/43 [00:11<00:01, 3.38it/s]
 
554
  93%|█████████▎| 40/43 [00:11<00:00, 3.27it/s]
 
555
  95%|█████████▌| 41/43 [00:11<00:00, 3.37it/s]
 
556
  98%|█████████▊| 42/43 [00:12<00:00, 3.19it/s]
 
557
 
 
558
 
559
  40%|████ | 500/1250 [13:13<17:17, 1.38s/it]
 
 
560
  Saving model checkpoint to ./checkpoint-500
 
 
 
 
 
1
+ 02/02/2022 18:04:15 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: True
2
+ 02/02/2022 18:04:15 - INFO - __main__ - Training/evaluation parameters TrainingArguments(
3
+ _n_gpu=1,
4
+ adafactor=False,
5
+ adam_beta1=0.9,
6
+ adam_beta2=0.999,
7
+ adam_epsilon=1e-08,
8
+ bf16=False,
9
+ bf16_full_eval=False,
10
+ dataloader_drop_last=False,
11
+ dataloader_num_workers=0,
12
+ dataloader_pin_memory=True,
13
+ ddp_bucket_cap_mb=None,
14
+ ddp_find_unused_parameters=None,
15
+ debug=[],
16
+ deepspeed=None,
17
+ disable_tqdm=False,
18
+ do_eval=True,
19
+ do_predict=False,
20
+ do_train=True,
21
+ eval_accumulation_steps=None,
22
+ eval_steps=500,
23
+ evaluation_strategy=IntervalStrategy.STEPS,
24
+ fp16=True,
25
+ fp16_backend=auto,
26
+ fp16_full_eval=False,
27
+ fp16_opt_level=O1,
28
+ gradient_accumulation_steps=4,
29
+ gradient_checkpointing=True,
30
+ greater_is_better=None,
31
+ group_by_length=True,
32
+ half_precision_backend=auto,
33
+ hub_model_id=None,
34
+ hub_strategy=HubStrategy.EVERY_SAVE,
35
+ hub_token=<HUB_TOKEN>,
36
+ ignore_data_skip=False,
37
+ label_names=None,
38
+ label_smoothing_factor=0.0,
39
+ learning_rate=7.5e-05,
40
+ length_column_name=input_length,
41
+ load_best_model_at_end=False,
42
+ local_rank=-1,
43
+ log_level=-1,
44
+ log_level_replica=-1,
45
+ log_on_each_node=True,
46
+ logging_dir=./runs/Feb02_18-04-15_job-86e1d453-0156-4b77-a98d-7d457c737175,
47
+ logging_first_step=False,
48
+ logging_nan_inf_filter=True,
49
+ logging_steps=100,
50
+ logging_strategy=IntervalStrategy.STEPS,
51
+ lr_scheduler_type=SchedulerType.LINEAR,
52
+ max_grad_norm=1.0,
53
+ max_steps=-1,
54
+ metric_for_best_model=None,
55
+ mp_parameters=,
56
+ no_cuda=False,
57
+ num_train_epochs=50.0,
58
+ optim=OptimizerNames.ADAMW_HF,
59
+ output_dir=./,
60
+ overwrite_output_dir=True,
61
+ past_index=-1,
62
+ per_device_eval_batch_size=8,
63
+ per_device_train_batch_size=8,
64
+ prediction_loss_only=False,
65
+ push_to_hub=True,
66
+ push_to_hub_model_id=None,
67
+ push_to_hub_organization=None,
68
+ push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
69
+ remove_unused_columns=True,
70
+ report_to=[],
71
+ resume_from_checkpoint=None,
72
+ run_name=./,
73
+ save_on_each_node=False,
74
+ save_steps=500,
75
+ save_strategy=IntervalStrategy.STEPS,
76
+ save_total_limit=3,
77
+ seed=42,
78
+ sharded_ddp=[],
79
+ skip_memory_metrics=True,
80
+ tf32=None,
81
+ tpu_metrics_debug=False,
82
+ tpu_num_cores=None,
83
+ use_legacy_prediction_loop=False,
84
+ warmup_ratio=0.0,
85
+ warmup_steps=2000,
86
+ weight_decay=0.0,
87
+ xpu_backend=None,
88
+ )
89
+ 02/02/2022 18:04:18 - WARNING - datasets.builder - Reusing dataset common_voice (/workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/ur/8.0.0/b8bc4d453193c06a43269b46cd87f075c70f152ac963b7f28f7a2760c45ec3e8)
90
+ 02/02/2022 18:04:20 - WARNING - datasets.builder - Reusing dataset common_voice (/workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/ur/8.0.0/b8bc4d453193c06a43269b46cd87f075c70f152ac963b7f28f7a2760c45ec3e8)
91
+ 02/02/2022 18:04:20 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/ur/8.0.0/b8bc4d453193c06a43269b46cd87f075c70f152ac963b7f28f7a2760c45ec3e8/cache-eefb1dcecdbc6361.arrow
92
+ 02/02/2022 18:04:20 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/ur/8.0.0/b8bc4d453193c06a43269b46cd87f075c70f152ac963b7f28f7a2760c45ec3e8/cache-bebf53ae59038f0e.arrow
93
+ loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/config.json from cache at /workspace/.cache/huggingface/transformers/dabc27df63e37bd2a7a221c7774e35f36a280fbdf917cf54cadfc7df8c786f6f.a3e4c3c967d9985881e0ae550a5f6f668f897db5ab2e0802f9b97973b15970e6
94
+ Model config Wav2Vec2Config {
95
+ "_name_or_path": "facebook/wav2vec2-xls-r-300m",
96
+ "activation_dropout": 0.0,
97
+ "adapter_kernel_size": 3,
98
+ "adapter_stride": 2,
99
+ "add_adapter": false,
100
+ "apply_spec_augment": true,
101
+ "architectures": [
102
+ "Wav2Vec2ForPreTraining"
103
+ ],
104
+ "attention_dropout": 0.1,
105
+ "bos_token_id": 1,
106
+ "classifier_proj_size": 256,
107
+ "codevector_dim": 768,
108
+ "contrastive_logits_temperature": 0.1,
109
+ "conv_bias": true,
110
+ "conv_dim": [
111
+ 512,
112
+ 512,
113
+ 512,
114
+ 512,
115
+ 512,
116
+ 512,
117
+ 512
118
+ ],
119
+ "conv_kernel": [
120
+ 10,
121
+ 3,
122
+ 3,
123
+ 3,
124
+ 3,
125
+ 2,
126
+ 2
127
+ ],
128
+ "conv_stride": [
129
+ 5,
130
+ 2,
131
+ 2,
132
+ 2,
133
+ 2,
134
+ 2,
135
+ 2
136
+ ],
137
+ "ctc_loss_reduction": "sum",
138
+ "ctc_zero_infinity": false,
139
+ "diversity_loss_weight": 0.1,
140
+ "do_stable_layer_norm": true,
141
+ "eos_token_id": 2,
142
+ "feat_extract_activation": "gelu",
143
+ "feat_extract_dropout": 0.0,
144
+ "feat_extract_norm": "layer",
145
+ "feat_proj_dropout": 0.1,
146
+ "feat_quantizer_dropout": 0.0,
147
+ "final_dropout": 0.0,
148
+ "gradient_checkpointing": false,
149
+ "hidden_act": "gelu",
150
+ "hidden_dropout": 0.1,
151
+ "hidden_size": 1024,
152
+ "initializer_range": 0.02,
153
+ "intermediate_size": 4096,
154
+ "layer_norm_eps": 1e-05,
155
+ "layerdrop": 0.1,
156
+ "mask_feature_length": 10,
157
+ "mask_feature_min_masks": 0,
158
+ "mask_feature_prob": 0.0,
159
+ "mask_time_length": 10,
160
+ "mask_time_min_masks": 2,
161
+ "mask_time_prob": 0.075,
162
+ "model_type": "wav2vec2",
163
+ "num_adapter_layers": 3,
164
+ "num_attention_heads": 16,
165
+ "num_codevector_groups": 2,
166
+ "num_codevectors_per_group": 320,
167
+ "num_conv_pos_embedding_groups": 16,
168
+ "num_conv_pos_embeddings": 128,
169
+ "num_feat_extract_layers": 7,
170
+ "num_hidden_layers": 24,
171
+ "num_negatives": 100,
172
+ "output_hidden_size": 1024,
173
+ "pad_token_id": 0,
174
+ "proj_codevector_dim": 768,
175
+ "tdnn_dilation": [
176
+ 1,
177
+ 2,
178
+ 3,
179
+ 1,
180
+ 1
181
+ ],
182
+ "tdnn_dim": [
183
+ 512,
184
+ 512,
185
+ 512,
186
+ 512,
187
+ 1500
188
+ ],
189
+ "tdnn_kernel": [
190
+ 5,
191
+ 3,
192
+ 3,
193
+ 1,
194
+ 1
195
+ ],
196
+ "torch_dtype": "float32",
197
+ "transformers_version": "4.17.0.dev0",
198
+ "use_weighted_layer_sum": false,
199
+ "vocab_size": 32,
200
+ "xvector_output_dim": 512
201
+ }
202
+
203
+
204
  0%| | 0/1 [00:00<?, ?ba/s]
205
+
206
  0%| | 0/1 [00:00<?, ?ba/s]
207
+ Didn't find file ./tokenizer.json. We won't load it.
208
+ loading file ./vocab.json
209
+ loading file ./tokenizer_config.json
210
+ loading file ./added_tokens.json
211
+ loading file ./special_tokens_map.json
212
+ loading file None
213
+ Adding <s> to the vocabulary
214
+ Adding </s> to the vocabulary
215
+ loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/config.json from cache at /workspace/.cache/huggingface/transformers/dabc27df63e37bd2a7a221c7774e35f36a280fbdf917cf54cadfc7df8c786f6f.a3e4c3c967d9985881e0ae550a5f6f668f897db5ab2e0802f9b97973b15970e6
216
+ Model config Wav2Vec2Config {
217
+ "_name_or_path": "facebook/wav2vec2-xls-r-300m",
218
+ "activation_dropout": 0.0,
219
+ "adapter_kernel_size": 3,
220
+ "adapter_stride": 2,
221
+ "add_adapter": false,
222
+ "apply_spec_augment": true,
223
+ "architectures": [
224
+ "Wav2Vec2ForPreTraining"
225
+ ],
226
+ "attention_dropout": 0.1,
227
+ "bos_token_id": 1,
228
+ "classifier_proj_size": 256,
229
+ "codevector_dim": 768,
230
+ "contrastive_logits_temperature": 0.1,
231
+ "conv_bias": true,
232
+ "conv_dim": [
233
+ 512,
234
+ 512,
235
+ 512,
236
+ 512,
237
+ 512,
238
+ 512,
239
+ 512
240
+ ],
241
+ "conv_kernel": [
242
+ 10,
243
+ 3,
244
+ 3,
245
+ 3,
246
+ 3,
247
+ 2,
248
+ 2
249
+ ],
250
+ "conv_stride": [
251
+ 5,
252
+ 2,
253
+ 2,
254
+ 2,
255
+ 2,
256
+ 2,
257
+ 2
258
+ ],
259
+ "ctc_loss_reduction": "sum",
260
+ "ctc_zero_infinity": false,
261
+ "diversity_loss_weight": 0.1,
262
+ "do_stable_layer_norm": true,
263
+ "eos_token_id": 2,
264
+ "feat_extract_activation": "gelu",
265
+ "feat_extract_dropout": 0.0,
266
+ "feat_extract_norm": "layer",
267
+ "feat_proj_dropout": 0.1,
268
+ "feat_quantizer_dropout": 0.0,
269
+ "final_dropout": 0.0,
270
+ "gradient_checkpointing": false,
271
+ "hidden_act": "gelu",
272
+ "hidden_dropout": 0.1,
273
+ "hidden_size": 1024,
274
+ "initializer_range": 0.02,
275
+ "intermediate_size": 4096,
276
+ "layer_norm_eps": 1e-05,
277
+ "layerdrop": 0.1,
278
+ "mask_feature_length": 10,
279
+ "mask_feature_min_masks": 0,
280
+ "mask_feature_prob": 0.0,
281
+ "mask_time_length": 10,
282
+ "mask_time_min_masks": 2,
283
+ "mask_time_prob": 0.075,
284
+ "model_type": "wav2vec2",
285
+ "num_adapter_layers": 3,
286
+ "num_attention_heads": 16,
287
+ "num_codevector_groups": 2,
288
+ "num_codevectors_per_group": 320,
289
+ "num_conv_pos_embedding_groups": 16,
290
+ "num_conv_pos_embeddings": 128,
291
+ "num_feat_extract_layers": 7,
292
+ "num_hidden_layers": 24,
293
+ "num_negatives": 100,
294
+ "output_hidden_size": 1024,
295
+ "pad_token_id": 0,
296
+ "proj_codevector_dim": 768,
297
+ "tdnn_dilation": [
298
+ 1,
299
+ 2,
300
+ 3,
301
+ 1,
302
+ 1
303
+ ],
304
+ "tdnn_dim": [
305
+ 512,
306
+ 512,
307
+ 512,
308
+ 512,
309
+ 1500
310
+ ],
311
+ "tdnn_kernel": [
312
+ 5,
313
+ 3,
314
+ 3,
315
+ 1,
316
+ 1
317
+ ],
318
+ "torch_dtype": "float32",
319
+ "transformers_version": "4.17.0.dev0",
320
+ "use_weighted_layer_sum": false,
321
+ "vocab_size": 32,
322
+ "xvector_output_dim": 512
323
+ }
324
+
325
+ loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/preprocessor_config.json from cache at /workspace/.cache/huggingface/transformers/6fb028b95b394059e7d3b367bbca2382b576c66aebe896f04d2cd34e1b575f5b.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326
326
+ Feature extractor Wav2Vec2FeatureExtractor {
327
+ "do_normalize": true,
328
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
329
+ "feature_size": 1,
330
+ "padding_side": "right",
331
+ "padding_value": 0,
332
+ "return_attention_mask": true,
333
+ "sampling_rate": 16000
334
+ }
335
+
336
+ loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/pytorch_model.bin from cache at /workspace/.cache/huggingface/transformers/1e6a6507f3b689035cd4b247e2a37c154e27f39143f31357a49b4e38baeccc36.1edb32803799e27ed554eb7dd935f6745b1a0b17b0ea256442fe24db6eb546cd
337
+ Some weights of the model checkpoint at facebook/wav2vec2-xls-r-300m were not used when initializing Wav2Vec2ForCTC: ['quantizer.weight_proj.weight', 'quantizer.codevectors', 'quantizer.weight_proj.bias', 'project_q.bias', 'project_q.weight', 'project_hid.bias', 'project_hid.weight']
338
+ - This IS expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
339
+ - This IS NOT expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
340
+ Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-300m and are newly initialized: ['lm_head.weight', 'lm_head.bias']
341
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
342
+
343
+
344
+
345
  0%| | 0/1 [00:00<?, ?ba/s]
346
+
347
  0%| | 0/1 [00:00<?, ?ba/s]
348
+ Configuration saved in ./preprocessor_config.json
349
+ tokenizer config file saved in ./tokenizer_config.json
350
+ Special tokens file saved in ./special_tokens_map.json
351
+ added tokens file saved in ./added_tokens.json
352
+ Configuration saved in ./config.json
353
+ loading feature extractor configuration file ./preprocessor_config.json
354
+ loading configuration file ./config.json
355
+ Model config Wav2Vec2Config {
356
+ "_name_or_path": "./",
357
+ "activation_dropout": 0.1,
358
+ "adapter_kernel_size": 3,
359
+ "adapter_stride": 2,
360
+ "add_adapter": false,
361
+ "apply_spec_augment": true,
362
+ "architectures": [
363
+ "Wav2Vec2ForPreTraining"
364
+ ],
365
+ "attention_dropout": 0.0,
366
+ "bos_token_id": 1,
367
+ "classifier_proj_size": 256,
368
+ "codevector_dim": 768,
369
+ "contrastive_logits_temperature": 0.1,
370
+ "conv_bias": true,
371
+ "conv_dim": [
372
+ 512,
373
+ 512,
374
+ 512,
375
+ 512,
376
+ 512,
377
+ 512,
378
+ 512
379
+ ],
380
+ "conv_kernel": [
381
+ 10,
382
+ 3,
383
+ 3,
384
+ 3,
385
+ 3,
386
+ 2,
387
+ 2
388
+ ],
389
+ "conv_stride": [
390
+ 5,
391
+ 2,
392
+ 2,
393
+ 2,
394
+ 2,
395
+ 2,
396
+ 2
397
+ ],
398
+ "ctc_loss_reduction": "mean",
399
+ "ctc_zero_infinity": false,
400
+ "diversity_loss_weight": 0.1,
401
+ "do_stable_layer_norm": true,
402
+ "eos_token_id": 2,
403
+ "feat_extract_activation": "gelu",
404
+ "feat_extract_dropout": 0.0,
405
+ "feat_extract_norm": "layer",
406
+ "feat_proj_dropout": 0.0,
407
+ "feat_quantizer_dropout": 0.0,
408
+ "final_dropout": 0.0,
409
+ "hidden_act": "gelu",
410
+ "hidden_dropout": 0.0,
411
+ "hidden_size": 1024,
412
+ "initializer_range": 0.02,
413
+ "intermediate_size": 4096,
414
+ "layer_norm_eps": 1e-05,
415
+ "layerdrop": 0.0,
416
+ "mask_feature_length": 64,
417
+ "mask_feature_min_masks": 0,
418
+ "mask_feature_prob": 0.25,
419
+ "mask_time_length": 10,
420
+ "mask_time_min_masks": 2,
421
+ "mask_time_prob": 0.75,
422
+ "model_type": "wav2vec2",
423
+ "num_adapter_layers": 3,
424
+ "num_attention_heads": 16,
425
+ "num_codevector_groups": 2,
426
+ "num_codevectors_per_group": 320,
427
+ "num_conv_pos_embedding_groups": 16,
428
+ "num_conv_pos_embeddings": 128,
429
+ "num_feat_extract_layers": 7,
430
+ "num_hidden_layers": 24,
431
+ "num_negatives": 100,
432
+ "output_hidden_size": 1024,
433
+ "pad_token_id": 58,
434
+ "proj_codevector_dim": 768,
435
+ "tdnn_dilation": [
436
+ 1,
437
+ 2,
438
+ 3,
439
+ 1,
440
+ 1
441
+ ],
442
+ "tdnn_dim": [
443
+ 512,
444
+ 512,
445
+ 512,
446
+ 512,
447
+ 1500
448
+ ],
449
+ "tdnn_kernel": [
450
+ 5,
451
+ 3,
452
+ 3,
453
+ 1,
454
+ 1
455
+ ],
456
+ "torch_dtype": "float32",
457
+ "transformers_version": "4.17.0.dev0",
458
+ "use_weighted_layer_sum": false,
459
+ "vocab_size": 61,
460
+ "xvector_output_dim": 512
461
+ }
462
+
463
+ loading feature extractor configuration file ./preprocessor_config.json
464
+ Feature extractor Wav2Vec2FeatureExtractor {
465
+ "do_normalize": true,
466
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
467
+ "feature_size": 1,
468
+ "padding_side": "right",
469
+ "padding_value": 0,
470
+ "return_attention_mask": true,
471
+ "sampling_rate": 16000
472
+ }
473
+
474
+ Didn't find file ./tokenizer.json. We won't load it.
475
+ loading file ./vocab.json
476
+ loading file ./tokenizer_config.json
477
+ loading file ./added_tokens.json
478
+ loading file ./special_tokens_map.json
479
+ loading file None
480
+ Adding <s> to the vocabulary
481
+ Adding </s> to the vocabulary
482
+ /workspace/xls-r-300m-ur/./ is already a clone of https://huggingface.co/HarrisDePerceptron/xls-r-300m-ur. Make sure you pull the latest changes with `repo.git_pull()`.
483
+ 02/02/2022 18:04:42 - WARNING - huggingface_hub.repository - /workspace/xls-r-300m-ur/./ is already a clone of https://huggingface.co/HarrisDePerceptron/xls-r-300m-ur. Make sure you pull the latest changes with `repo.git_pull()`.
484
+ Using amp half precision backend
485
+ The following columns in the training set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.
486
+ /opt/conda/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use thePyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning
487
+ warnings.warn(
488
+ ***** Running training *****
489
+ Num examples = 810
490
+ Num Epochs = 50
491
+ Instantaneous batch size per device = 8
492
+ Total train batch size (w. parallel, distributed & accumulation) = 32
493
+ Gradient Accumulation steps = 4
494
+ Total optimization steps = 1250
495
+
496
  0%| | 0/1250 [00:00<?, ?it/s]
497
  0%| | 1/1250 [00:02<53:39, 2.58s/it]
498
  0%| | 2/1250 [00:04<43:18, 2.08s/it]
499
  0%| | 3/1250 [00:05<37:42, 1.81s/it]
500
  0%| | 4/1250 [00:07<34:12, 1.65s/it]
501
  0%| | 5/1250 [00:08<31:07, 1.50s/it]
502
  0%| | 6/1250 [00:09<27:31, 1.33s/it]
503
  1%| | 7/1250 [00:11<35:11, 1.70s/it]
504
  1%| | 8/1250 [00:13<35:39, 1.72s/it]
505
  1%| | 9/1250 [00:15<34:52, 1.69s/it]
506
  1%| | 10/1250 [00:16<33:13, 1.61s/it]
507
  1%| | 11/1250 [00:17<30:26, 1.47s/it]
508
  1%| | 12/1250 [00:18<27:37, 1.34s/it]
509
  1%| | 13/1250 [00:21<34:42, 1.68s/it]
510
  1%| | 14/1250 [00:23<34:49, 1.69s/it]
511
  1%| | 15/1250 [00:24<34:04, 1.66s/it]
512
  1%|▏ | 16/1250 [00:26<32:22, 1.57s/it]
513
  1%|▏ | 17/1250 [00:27<29:57, 1.46s/it]
514
  1%|▏ | 18/1250 [00:28<26:57, 1.31s/it]
515
  2%|▏ | 19/1250 [00:30<32:14, 1.57s/it]
516
  2%|▏ | 20/1250 [00:32<32:52, 1.60s/it]
517
  2%|▏ | 21/1250 [00:33<32:10, 1.57s/it]
518
  2%|▏ | 22/1250 [00:34<30:44, 1.50s/it]
519
  2%|▏ | 23/1250 [00:36<28:38, 1.40s/it]
520
  2%|▏ | 24/1250 [00:37<26:12, 1.28s/it]
521
  2%|▏ | 25/1250 [00:38<28:56, 1.42s/it]
522
  2%|▏ | 26/1250 [00:41<38:28, 1.89s/it]
523
  2%|▏ | 27/1250 [00:43<37:43, 1.85s/it]
524
  2%|▏ | 28/1250 [00:45<35:59, 1.77s/it]
525
  2%|▏ | 29/1250 [00:46<33:21, 1.64s/it]
526
  2%|▏ | 30/1250 [00:47<30:47, 1.51s/it]
527
  2%|▏ | 31/1250 [00:48<27:31, 1.36s/it]
528
  3%|▎ | 32/1250 [00:51<33:43, 1.66s/it]
529
  3%|▎ | 33/1250 [00:52<34:25, 1.70s/it]
530
  3%|▎ | 34/1250 [00:54<33:34, 1.66s/it]
531
  3%|▎ | 35/1250 [00:55<31:55, 1.58s/it]
532
  3%|▎ | 36/1250 [00:56<29:40, 1.47s/it]
533
  3%|▎ | 37/1250 [00:58<26:54, 1.33s/it]
534
  3%|▎ | 38/1250 [01:00<33:02, 1.64s/it]
535
  3%|▎ | 39/1250 [01:02<33:43, 1.67s/it]
536
  3%|▎ | 40/1250 [01:03<32:32, 1.61s/it]
537
  3%|▎ | 41/1250 [01:04<30:50, 1.53s/it]
538
  3%|▎ | 42/1250 [01:06<28:27, 1.41s/it]
539
  3%|▎ | 43/1250 [01:07<25:45, 1.28s/it]
540
  4%|▎ | 44/1250 [01:09<31:31, 1.57s/it]
541
  4%|▎ | 45/1250 [01:10<32:20, 1.61s/it]
542
  4%|▎ | 46/1250 [01:12<31:58, 1.59s/it]
543
  4%|▍ | 47/1250 [01:13<30:42, 1.53s/it]
544
  4%|▍ | 48/1250 [01:15<28:46, 1.44s/it]
545
  4%|▍ | 49/1250 [01:16<26:22, 1.32s/it]
546
  4%|▍ | 50/1250 [01:17<29:12, 1.46s/it]
547
  4%|▍ | 51/1250 [01:20<37:20, 1.87s/it]
548
  4%|▍ | 52/1250 [01:22<36:38, 1.83s/it]
549
  4%|▍ | 53/1250 [01:24<35:02, 1.76s/it]
550
  4%|▍ | 54/1250 [01:25<32:55, 1.65s/it]
551
  4%|▍ | 55/1250 [01:26<30:10, 1.51s/it]
552
  4%|▍ | 56/1250 [01:27<26:59, 1.36s/it]
553
  5%|▍ | 57/1250 [01:29<32:08, 1.62s/it]
554
  5%|▍ | 58/1250 [01:31<32:44, 1.65s/it]
555
  5%|▍ | 59/1250 [01:33<31:55, 1.61s/it]
556
  5%|▍ | 60/1250 [01:34<30:25, 1.53s/it]
557
  5%|▍ | 61/1250 [01:35<28:26, 1.44s/it]
558
  5%|▍ | 62/1250 [01:36<26:14, 1.33s/it]
559
  5%|▌ | 63/1250 [01:39<32:19, 1.63s/it]
560
  5%|▌ | 64/1250 [01:40<33:12, 1.68s/it]
561
  5%|▌ | 65/1250 [01:42<32:10, 1.63s/it]
562
  5%|▌ | 66/1250 [01:43<30:43, 1.56s/it]
563
  5%|▌ | 67/1250 [01:45<28:35, 1.45s/it]
564
  5%|▌ | 68/1250 [01:46<25:57, 1.32s/it]
565
  6%|▌ | 69/1250 [01:48<33:24, 1.70s/it]
566
  6%|▌ | 70/1250 [01:50<34:10, 1.74s/it]
567
  6%|▌ | 71/1250 [01:51<32:41, 1.66s/it]
568
  6%|▌ | 72/1250 [01:53<30:43, 1.57s/it]
569
  6%|▌ | 73/1250 [01:54<28:32, 1.46s/it]
570
  6%|▌ | 74/1250 [01:55<25:40, 1.31s/it]
571
  6%|▌ | 75/1250 [01:57<28:23, 1.45s/it]
572
  6%|▌ | 76/1250 [02:00<37:02, 1.89s/it]
573
  6%|▌ | 77/1250 [02:01<36:23, 1.86s/it]
574
  6%|▌ | 78/1250 [02:03<34:41, 1.78s/it]
575
  6%|▋ | 79/1250 [02:04<32:21, 1.66s/it]
576
  6%|▋ | 80/1250 [02:06<29:42, 1.52s/it]
577
  6%|▋ | 81/1250 [02:07<26:30, 1.36s/it]
578
  7%|▋ | 82/1250 [02:09<32:23, 1.66s/it]
579
  7%|▋ | 83/1250 [02:11<32:49, 1.69s/it]
580
  7%|▋ | 84/1250 [02:12<31:56, 1.64s/it]
581
  7%|▋ | 85/1250 [02:14<30:09, 1.55s/it]
582
  7%|▋ | 86/1250 [02:15<27:51, 1.44s/it]
583
  7%|▋ | 87/1250 [02:16<25:16, 1.30s/it]
584
  7%|▋ | 88/1250 [02:18<32:07, 1.66s/it]
585
  7%|▋ | 89/1250 [02:20<32:48, 1.70s/it]
586
  7%|▋ | 90/1250 [02:22<31:43, 1.64s/it]
587
  7%|▋ | 91/1250 [02:23<30:21, 1.57s/it]
588
  7%|▋ | 92/1250 [02:24<28:26, 1.47s/it]
589
  7%|▋ | 93/1250 [02:25<26:29, 1.37s/it]
590
  8%|▊ | 94/1250 [02:28<31:23, 1.63s/it]
591
  8%|▊ | 95/1250 [02:29<31:51, 1.66s/it]
592
  8%|▊ | 96/1250 [02:31<30:50, 1.60s/it]
593
  8%|▊ | 97/1250 [02:32<29:30, 1.54s/it]
594
  8%|▊ | 98/1250 [02:33<27:25, 1.43s/it]
595
  8%|▊ | 99/1250 [02:34<24:44, 1.29s/it]
596
  8%|▊ | 100/1250 [02:36<26:59, 1.41s/it]
597
 
598
  8%|▊ | 100/1250 [02:36<26:59, 1.41s/it]
599
  8%|▊ | 101/1250 [02:39<35:08, 1.84s/it]
600
  8%|▊ | 102/1250 [02:41<34:28, 1.80s/it]
601
  8%|▊ | 103/1250 [02:42<32:30, 1.70s/it]
602
  8%|▊ | 104/1250 [02:43<30:05, 1.58s/it]
603
  8%|▊ | 105/1250 [02:44<27:37, 1.45s/it]
604
  8%|▊ | 106/1250 [02:45<24:44, 1.30s/it]
605
  9%|▊ | 107/1250 [02:48<30:38, 1.61s/it]
606
  9%|▊ | 108/1250 [02:49<31:24, 1.65s/it]
607
  9%|▊ | 109/1250 [02:51<30:45, 1.62s/it]
608
  9%|▉ | 110/1250 [02:52<29:22, 1.55s/it]
609
  9%|▉ | 111/1250 [02:54<27:20, 1.44s/it]
610
  9%|▉ | 112/1250 [02:55<24:41, 1.30s/it]
611
  9%|▉ | 113/1250 [02:57<29:34, 1.56s/it]
612
  9%|▉ | 114/1250 [02:58<30:11, 1.59s/it]
613
  9%|▉ | 115/1250 [03:00<29:50, 1.58s/it]
614
  9%|▉ | 116/1250 [03:01<28:49, 1.53s/it]
615
  9%|▉ | 117/1250 [03:03<26:52, 1.42s/it]
616
  9%|▉ | 118/1250 [03:04<24:25, 1.29s/it]
617
  10%|▉ | 119/1250 [03:06<30:27, 1.62s/it]
618
  10%|▉ | 120/1250 [03:08<31:40, 1.68s/it]
619
  10%|▉ | 121/1250 [03:09<30:52, 1.64s/it]
620
  10%|▉ | 122/1250 [03:11<29:20, 1.56s/it]
621
  10%|▉ | 123/1250 [03:12<27:11, 1.45s/it]
622
  10%|▉ | 124/1250 [03:13<24:31, 1.31s/it]
623
  10%|█ | 125/1250 [03:15<28:39, 1.53s/it]
624
  10%|█ | 126/1250 [03:18<36:45, 1.96s/it]
625
  10%|█ | 127/1250 [03:20<35:48, 1.91s/it]
626
  10%|█ | 128/1250 [03:21<33:48, 1.81s/it]
627
  10%|█ | 129/1250 [03:23<31:32, 1.69s/it]
628
  10%|█ | 130/1250 [03:24<28:58, 1.55s/it]
629
  10%|█ | 131/1250 [03:25<25:53, 1.39s/it]
630
  11%|█ | 132/1250 [03:27<31:02, 1.67s/it]
631
  11%|█ | 133/1250 [03:29<31:42, 1.70s/it]
632
  11%|█ | 134/1250 [03:30<30:44, 1.65s/it]
633
  11%|█ | 135/1250 [03:32<29:06, 1.57s/it]
634
  11%|█ | 136/1250 [03:33<26:59, 1.45s/it]
635
  11%|█ | 137/1250 [03:34<24:33, 1.32s/it]
636
  11%|█ | 138/1250 [03:36<30:43, 1.66s/it]
637
  11%|█ | 139/1250 [03:38<31:10, 1.68s/it]
638
  11%|█ | 140/1250 [03:40<30:34, 1.65s/it]
639
  11%|█▏ | 141/1250 [03:41<29:08, 1.58s/it]
640
  11%|█▏ | 142/1250 [03:42<26:41, 1.45s/it]
641
  11%|█▏ | 143/1250 [03:43<24:14, 1.31s/it]
642
  12%|█▏ | 144/1250 [03:46<29:19, 1.59s/it]
643
  12%|█▏ | 145/1250 [03:47<29:55, 1.62s/it]
644
  12%|█▏ | 146/1250 [03:49<29:20, 1.59s/it]
645
  12%|█▏ | 147/1250 [03:50<28:09, 1.53s/it]
646
  12%|█▏ | 148/1250 [03:51<26:42, 1.45s/it]
647
  12%|█▏ | 149/1250 [03:52<24:23, 1.33s/it]
648
  12%|█▏ | 150/1250 [03:54<27:24, 1.49s/it]
649
  12%|█▏ | 151/1250 [03:57<34:21, 1.88s/it]
650
  12%|█▏ | 152/1250 [03:59<33:23, 1.82s/it]
651
  12%|█▏ | 153/1250 [04:00<31:32, 1.72s/it]
652
  12%|█▏ | 154/1250 [04:02<29:24, 1.61s/it]
653
  12%|█▏ | 155/1250 [04:03<27:13, 1.49s/it]
654
  12%|█▏ | 156/1250 [04:04<24:24, 1.34s/it]
655
  13%|█▎ | 157/1250 [04:06<30:38, 1.68s/it]
656
  13%|█▎ | 158/1250 [04:08<30:45, 1.69s/it]
657
  13%|█▎ | 159/1250 [04:10<30:02, 1.65s/it]
658
  13%|█▎ | 160/1250 [04:11<28:40, 1.58s/it]
659
  13%|█▎ | 161/1250 [04:12<26:29, 1.46s/it]
660
  13%|█▎ | 162/1250 [04:13<23:57, 1.32s/it]
661
  13%|█▎ | 163/1250 [04:16<29:54, 1.65s/it]
662
  13%|█▎ | 164/1250 [04:17<30:57, 1.71s/it]
663
  13%|█▎ | 165/1250 [04:19<29:58, 1.66s/it]
664
  13%|█▎ | 166/1250 [04:20<28:18, 1.57s/it]
665
  13%|█▎ | 167/1250 [04:22<26:21, 1.46s/it]
666
  13%|█▎ | 168/1250 [04:23<23:44, 1.32s/it]
667
  14%|█▎ | 169/1250 [04:25<29:27, 1.63s/it]
668
  14%|█▎ | 170/1250 [04:27<30:05, 1.67s/it]
669
  14%|█▎ | 171/1250 [04:28<29:08, 1.62s/it]
670
  14%|█▍ | 172/1250 [04:30<27:20, 1.52s/it]
671
  14%|█▍ | 173/1250 [04:31<25:27, 1.42s/it]
672
  14%|█▍ | 174/1250 [04:32<22:57, 1.28s/it]
673
  14%|█▍ | 175/1250 [04:33<25:14, 1.41s/it]
674
  14%|█▍ | 176/1250 [04:36<32:49, 1.83s/it]
675
  14%|█▍ | 177/1250 [04:38<32:19, 1.81s/it]
676
  14%|█▍ | 178/1250 [04:39<30:54, 1.73s/it]
677
  14%|█▍ | 179/1250 [04:41<29:09, 1.63s/it]
678
  14%|█▍ | 180/1250 [04:42<26:40, 1.50s/it]
679
  14%|█▍ | 181/1250 [04:43<23:54, 1.34s/it]
680
  15%|█▍ | 182/1250 [04:45<28:58, 1.63s/it]
681
  15%|█▍ | 183/1250 [04:47<29:22, 1.65s/it]
682
  15%|█▍ | 184/1250 [04:49<28:39, 1.61s/it]
683
  15%|█▍ | 185/1250 [04:50<27:09, 1.53s/it]
684
  15%|█▍ | 186/1250 [04:51<25:10, 1.42s/it]
685
  15%|█▍ | 187/1250 [04:52<22:49, 1.29s/it]
686
  15%|█▌ | 188/1250 [04:55<29:02, 1.64s/it]
687
  15%|█▌ | 189/1250 [04:56<29:43, 1.68s/it]
688
  15%|█▌ | 190/1250 [04:58<28:55, 1.64s/it]
689
  15%|█▌ | 191/1250 [04:59<27:14, 1.54s/it]
690
  15%|█▌ | 192/1250 [05:00<25:07, 1.43s/it]
691
  15%|█▌ | 193/1250 [05:01<22:42, 1.29s/it]
692
  16%|█▌ | 194/1250 [05:04<27:52, 1.58s/it]
693
  16%|█▌ | 195/1250 [05:05<28:41, 1.63s/it]
694
  16%|█▌ | 196/1250 [05:07<28:06, 1.60s/it]
695
  16%|█▌ | 197/1250 [05:08<27:08, 1.55s/it]
696
  16%|█▌ | 198/1250 [05:09<25:40, 1.46s/it]
697
  16%|█▌ | 199/1250 [05:11<23:23, 1.34s/it]
698
  16%|█▌ | 200/1250 [05:12<26:23, 1.51s/it]
699
 
700
  16%|█▌ | 200/1250 [05:12<26:23, 1.51s/it]
701
  16%|█▌ | 201/1250 [05:15<33:33, 1.92s/it]
702
  16%|█▌ | 202/1250 [05:17<32:28, 1.86s/it]
703
  16%|█▌ | 203/1250 [05:19<30:40, 1.76s/it]
704
  16%|█▋ | 204/1250 [05:20<28:51, 1.66s/it]
705
  16%|█▋ | 205/1250 [05:21<26:31, 1.52s/it]
706
  16%|█▋ | 206/1250 [05:22<23:42, 1.36s/it]
707
  17%|█▋ | 207/1250 [05:25<29:02, 1.67s/it]
708
  17%|█▋ | 208/1250 [05:26<29:31, 1.70s/it]
709
  17%|█▋ | 209/1250 [05:28<28:26, 1.64s/it]
710
  17%|█▋ | 210/1250 [05:29<26:54, 1.55s/it]
711
  17%|█▋ | 211/1250 [05:30<24:56, 1.44s/it]
712
  17%|█▋ | 212/1250 [05:31<22:33, 1.30s/it]
713
  17%|█▋ | 213/1250 [05:34<28:02, 1.62s/it]
714
  17%|█▋ | 214/1250 [05:35<28:39, 1.66s/it]
715
  17%|█▋ | 215/1250 [05:37<27:54, 1.62s/it]
716
  17%|█▋ | 216/1250 [05:38<26:40, 1.55s/it]
717
  17%|█▋ | 217/1250 [05:40<24:52, 1.44s/it]
718
  17%|█▋ | 218/1250 [05:41<22:36, 1.31s/it]
719
  18%|█▊ | 219/1250 [05:43<27:51, 1.62s/it]
720
  18%|█▊ | 220/1250 [05:45<28:23, 1.65s/it]
721
  18%|█▊ | 221/1250 [05:46<27:36, 1.61s/it]
722
  18%|█▊ | 222/1250 [05:47<26:09, 1.53s/it]
723
  18%|█▊ | 223/1250 [05:49<24:20, 1.42s/it]
724
  18%|█▊ | 224/1250 [05:50<22:20, 1.31s/it]
725
  18%|█▊ | 225/1250 [05:52<25:04, 1.47s/it]
726
  18%|█▊ | 226/1250 [05:54<32:16, 1.89s/it]
727
  18%|█▊ | 227/1250 [05:56<31:41, 1.86s/it]
728
  18%|█▊ | 228/1250 [05:58<30:00, 1.76s/it]
729
  18%|█▊ | 229/1250 [05:59<27:50, 1.64s/it]
730
  18%|█▊ | 230/1250 [06:00<25:26, 1.50s/it]
731
  18%|█▊ | 231/1250 [06:01<22:49, 1.34s/it]
732
  19%|█▊ | 232/1250 [06:04<27:42, 1.63s/it]
733
  19%|█▊ | 233/1250 [06:05<28:19, 1.67s/it]
734
  19%|█▊ | 234/1250 [06:07<27:47, 1.64s/it]
735
  19%|█▉ | 235/1250 [06:08<26:41, 1.58s/it]
736
  19%|█▉ | 236/1250 [06:10<25:01, 1.48s/it]
737
  19%|█▉ | 237/1250 [06:11<22:57, 1.36s/it]
738
  19%|█▉ | 238/1250 [06:13<28:17, 1.68s/it]
739
  19%|█▉ | 239/1250 [06:15<28:30, 1.69s/it]
740
  19%|█▉ | 240/1250 [06:16<27:31, 1.64s/it]
741
  19%|█▉ | 241/1250 [06:18<25:47, 1.53s/it]
742
  19%|█▉ | 242/1250 [06:19<23:55, 1.42s/it]
743
  19%|█▉ | 243/1250 [06:20<21:42, 1.29s/it]
744
  20%|█▉ | 244/1250 [06:22<27:29, 1.64s/it]
745
  20%|█▉ | 245/1250 [06:24<27:55, 1.67s/it]
746
  20%|█▉ | 246/1250 [06:25<27:02, 1.62s/it]
747
  20%|█▉ | 247/1250 [06:27<25:41, 1.54s/it]
748
  20%|█▉ | 248/1250 [06:28<23:48, 1.43s/it]
749
  20%|█▉ | 249/1250 [06:29<21:24, 1.28s/it]
750
  20%|██ | 250/1250 [06:31<23:19, 1.40s/it]
751
  20%|██ | 251/1250 [06:34<31:22, 1.88s/it]
752
  20%|██ | 252/1250 [06:35<30:47, 1.85s/it]
753
  20%|██ | 253/1250 [06:37<28:53, 1.74s/it]
754
  20%|██ | 254/1250 [06:38<26:57, 1.62s/it]
755
  20%|██ | 255/1250 [06:39<24:37, 1.48s/it]
756
  20%|██ | 256/1250 [06:40<22:05, 1.33s/it]
757
  21%|██ | 257/1250 [06:43<27:33, 1.66s/it]
758
  21%|██ | 258/1250 [06:45<28:01, 1.69s/it]
759
  21%|██ | 259/1250 [06:46<27:31, 1.67s/it]
760
  21%|██ | 260/1250 [06:48<26:14, 1.59s/it]
761
  21%|██ | 261/1250 [06:49<24:25, 1.48s/it]
762
  21%|██ | 262/1250 [06:50<21:49, 1.33s/it]
763
  21%|██ | 263/1250 [06:52<26:46, 1.63s/it]
764
  21%|██ | 264/1250 [06:54<27:14, 1.66s/it]
765
  21%|██ | 265/1250 [06:55<26:29, 1.61s/it]
766
  21%|██▏ | 266/1250 [06:57<25:03, 1.53s/it]
767
  21%|██▏ | 267/1250 [06:58<23:19, 1.42s/it]
768
  21%|██▏ | 268/1250 [06:59<21:12, 1.30s/it]
769
  22%|██▏ | 269/1250 [07:01<25:45, 1.58s/it]
770
  22%|██▏ | 270/1250 [07:03<26:41, 1.63s/it]
771
  22%|██▏ | 271/1250 [07:04<26:09, 1.60s/it]
772
  22%|██▏ | 272/1250 [07:06<24:53, 1.53s/it]
773
  22%|██▏ | 273/1250 [07:07<23:15, 1.43s/it]
774
  22%|██▏ | 274/1250 [07:08<21:06, 1.30s/it]
775
  22%|██▏ | 275/1250 [07:10<22:48, 1.40s/it]
776
  22%|██▏ | 276/1250 [07:12<29:24, 1.81s/it]
777
  22%|██▏ | 277/1250 [07:14<29:19, 1.81s/it]
778
  22%|██▏ | 278/1250 [07:16<27:58, 1.73s/it]
779
  22%|██▏ | 279/1250 [07:17<26:26, 1.63s/it]
780
  22%|██▏ | 280/1250 [07:18<24:16, 1.50s/it]
781
  22%|██▏ | 281/1250 [07:19<21:57, 1.36s/it]
782
  23%|██▎ | 282/1250 [07:22<26:51, 1.66s/it]
783
  23%|██▎ | 283/1250 [07:23<27:18, 1.69s/it]
784
  23%|██▎ | 284/1250 [07:25<26:33, 1.65s/it]
785
  23%|██▎ | 285/1250 [07:26<25:13, 1.57s/it]
786
  23%|██▎ | 286/1250 [07:28<23:27, 1.46s/it]
787
  23%|██▎ | 287/1250 [07:29<21:21, 1.33s/it]
788
  23%|██▎ | 288/1250 [07:31<26:40, 1.66s/it]
789
  23%|██▎ | 289/1250 [07:33<26:49, 1.68s/it]
790
  23%|██▎ | 290/1250 [07:34<26:15, 1.64s/it]
791
  23%|██▎ | 291/1250 [07:36<24:52, 1.56s/it]
792
  23%|██▎ | 292/1250 [07:37<22:56, 1.44s/it]
793
  23%|██▎ | 293/1250 [07:38<20:36, 1.29s/it]
794
  24%|██▎ | 294/1250 [07:40<25:20, 1.59s/it]
795
  24%|██▎ | 295/1250 [07:42<25:45, 1.62s/it]
796
  24%|██▎ | 296/1250 [07:43<24:59, 1.57s/it]
797
  24%|██▍ | 297/1250 [07:44<23:42, 1.49s/it]
798
  24%|██▍ | 298/1250 [07:46<22:08, 1.40s/it]
799
  24%|██▍ | 299/1250 [07:47<20:04, 1.27s/it]
800
  24%|██▍ | 300/1250 [07:48<22:58, 1.45s/it]
801
 
802
  24%|██▍ | 300/1250 [07:48<22:58, 1.45s/it]
803
  24%|██▍ | 301/1250 [07:51<29:17, 1.85s/it]
804
  24%|██▍ | 302/1250 [07:53<28:36, 1.81s/it]
805
  24%|██▍ | 303/1250 [07:55<27:16, 1.73s/it]
806
  24%|██▍ | 304/1250 [07:56<25:46, 1.63s/it]
807
  24%|██▍ | 305/1250 [07:57<23:38, 1.50s/it]
808
  24%|██▍ | 306/1250 [07:58<21:16, 1.35s/it]
809
  25%|██▍ | 307/1250 [08:00<25:45, 1.64s/it]
810
  25%|██▍ | 308/1250 [08:02<26:24, 1.68s/it]
811
  25%|██▍ | 309/1250 [08:04<25:52, 1.65s/it]
812
  25%|██▍ | 310/1250 [08:05<24:30, 1.56s/it]
813
  25%|██▍ | 311/1250 [08:06<22:52, 1.46s/it]
814
  25%|██▍ | 312/1250 [08:07<20:47, 1.33s/it]
815
  25%|██▌ | 313/1250 [08:10<26:16, 1.68s/it]
816
  25%|██▌ | 314/1250 [08:12<26:45, 1.72s/it]
817
  25%|██▌ | 315/1250 [08:13<25:53, 1.66s/it]
818
  25%|██▌ | 316/1250 [08:15<24:24, 1.57s/it]
819
  25%|██▌ | 317/1250 [08:16<22:36, 1.45s/it]
820
  25%|█��▌ | 318/1250 [08:17<20:24, 1.31s/it]
821
  26%|██▌ | 319/1250 [08:19<25:10, 1.62s/it]
822
  26%|██▌ | 320/1250 [08:21<25:30, 1.65s/it]
823
  26%|██▌ | 321/1250 [08:22<24:42, 1.60s/it]
824
  26%|██▌ | 322/1250 [08:24<23:36, 1.53s/it]
825
  26%|██▌ | 323/1250 [08:25<21:59, 1.42s/it]
826
  26%|██▌ | 324/1250 [08:26<19:52, 1.29s/it]
827
  26%|██▌ | 325/1250 [08:28<22:51, 1.48s/it]
828
  26%|██▌ | 326/1250 [08:31<28:59, 1.88s/it]
829
  26%|██▌ | 327/1250 [08:32<28:15, 1.84s/it]
830
  26%|██▌ | 328/1250 [08:34<26:38, 1.73s/it]
831
  26%|██▋ | 329/1250 [08:35<24:51, 1.62s/it]
832
  26%|██▋ | 330/1250 [08:36<22:40, 1.48s/it]
833
  26%|██▋ | 331/1250 [08:37<20:21, 1.33s/it]
834
  27%|██▋ | 332/1250 [08:40<24:40, 1.61s/it]
835
  27%|██▋ | 333/1250 [08:41<25:06, 1.64s/it]
836
  27%|██▋ | 334/1250 [08:43<24:38, 1.61s/it]
837
  27%|██▋ | 335/1250 [08:44<23:33, 1.54s/it]
838
  27%|██▋ | 336/1250 [08:45<21:48, 1.43s/it]
839
  27%|██▋ | 337/1250 [08:46<19:37, 1.29s/it]
840
  27%|██▋ | 338/1250 [08:49<24:31, 1.61s/it]
841
  27%|██▋ | 339/1250 [08:50<25:13, 1.66s/it]
842
  27%|██▋ | 340/1250 [08:52<24:29, 1.61s/it]
843
  27%|██▋ | 341/1250 [08:53<23:28, 1.55s/it]
844
  27%|██▋ | 342/1250 [08:55<21:50, 1.44s/it]
845
  27%|██▋ | 343/1250 [08:56<20:01, 1.32s/it]
846
  28%|██▊ | 344/1250 [08:58<24:53, 1.65s/it]
847
  28%|██▊ | 345/1250 [09:00<25:11, 1.67s/it]
848
  28%|██▊ | 346/1250 [09:01<24:32, 1.63s/it]
849
  28%|██▊ | 347/1250 [09:03<23:18, 1.55s/it]
850
  28%|██▊ | 348/1250 [09:04<21:27, 1.43s/it]
851
  28%|██▊ | 349/1250 [09:05<19:20, 1.29s/it]
852
  28%|██▊ | 350/1250 [09:07<22:39, 1.51s/it]
853
  28%|██▊ | 351/1250 [09:10<29:06, 1.94s/it]
854
  28%|██▊ | 352/1250 [09:12<28:24, 1.90s/it]
855
  28%|██▊ | 353/1250 [09:13<26:48, 1.79s/it]
856
  28%|██▊ | 354/1250 [09:14<24:54, 1.67s/it]
857
  28%|██▊ | 355/1250 [09:16<22:40, 1.52s/it]
858
  28%|██▊ | 356/1250 [09:17<20:29, 1.38s/it]
859
  29%|██▊ | 357/1250 [09:19<24:22, 1.64s/it]
860
  29%|██▊ | 358/1250 [09:21<24:35, 1.65s/it]
861
  29%|██▊ | 359/1250 [09:22<23:54, 1.61s/it]
862
  29%|██▉ | 360/1250 [09:23<22:44, 1.53s/it]
863
  29%|██▉ | 361/1250 [09:25<21:14, 1.43s/it]
864
  29%|██▉ | 362/1250 [09:26<19:13, 1.30s/it]
865
  29%|██▉ | 363/1250 [09:28<23:56, 1.62s/it]
866
  29%|██▉ | 364/1250 [09:30<24:15, 1.64s/it]
867
  29%|██▉ | 365/1250 [09:31<23:39, 1.60s/it]
868
  29%|██▉ | 366/1250 [09:33<22:30, 1.53s/it]
869
  29%|██▉ | 367/1250 [09:34<21:11, 1.44s/it]
870
  29%|██▉ | 368/1250 [09:35<19:05, 1.30s/it]
871
  30%|██▉ | 369/1250 [09:37<24:00, 1.63s/it]
872
  30%|██▉ | 370/1250 [09:39<24:30, 1.67s/it]
873
  30%|██▉ | 371/1250 [09:40<23:53, 1.63s/it]
874
  30%|██▉ | 372/1250 [09:42<22:41, 1.55s/it]
875
  30%|██▉ | 373/1250 [09:43<20:58, 1.44s/it]
876
  30%|██▉ | 374/1250 [09:44<18:54, 1.30s/it]
877
  30%|███ | 375/1250 [09:46<20:17, 1.39s/it]
878
  30%|███ | 376/1250 [09:48<26:28, 1.82s/it]
879
  30%|███ | 377/1250 [09:50<25:49, 1.78s/it]
880
  30%|███ | 378/1250 [09:52<24:25, 1.68s/it]
881
  30%|███ | 379/1250 [09:53<22:51, 1.57s/it]
882
  30%|███ | 380/1250 [09:54<21:04, 1.45s/it]
883
  30%|███ | 381/1250 [09:55<18:55, 1.31s/it]
884
  31%|███ | 382/1250 [09:57<23:55, 1.65s/it]
885
  31%|███ | 383/1250 [09:59<24:26, 1.69s/it]
886
  31%|███ | 384/1250 [10:01<23:44, 1.64s/it]
887
  31%|███ | 385/1250 [10:02<22:36, 1.57s/it]
888
  31%|███ | 386/1250 [10:03<21:00, 1.46s/it]
889
  31%|███ | 387/1250 [10:04<19:05, 1.33s/it]
890
  31%|███ | 388/1250 [10:07<22:40, 1.58s/it]
891
  31%|███ | 389/1250 [10:08<23:05, 1.61s/it]
892
  31%|███ | 390/1250 [10:10<22:56, 1.60s/it]
893
  31%|███▏ | 391/1250 [10:11<22:10, 1.55s/it]
894
  31%|███▏ | 392/1250 [10:12<20:33, 1.44s/it]
895
  31%|███▏ | 393/1250 [10:13<18:37, 1.30s/it]
896
  32%|███▏ | 394/1250 [10:16<23:25, 1.64s/it]
897
  32%|███▏ | 395/1250 [10:18<23:45, 1.67s/it]
898
  32%|███▏ | 396/1250 [10:19<23:08, 1.63s/it]
899
  32%|███▏ | 397/1250 [10:20<21:55, 1.54s/it]
900
  32%|███▏ | 398/1250 [10:22<20:16, 1.43s/it]
901
  32%|███▏ | 399/1250 [10:23<18:21, 1.29s/it]
902
  32%|███▏ | 400/1250 [10:24<20:34, 1.45s/it]
903
 
904
  32%|███▏ | 400/1250 [10:24<20:34, 1.45s/it]
905
  32%|███▏ | 401/1250 [10:27<26:02, 1.84s/it]
906
  32%|███▏ | 402/1250 [10:29<25:43, 1.82s/it]
907
  32%|███▏ | 403/1250 [10:31<24:36, 1.74s/it]
908
  32%|███▏ | 404/1250 [10:32<22:58, 1.63s/it]
909
  32%|███▏ | 405/1250 [10:33<21:12, 1.51s/it]
910
  32%|███▏ | 406/1250 [10:34<19:08, 1.36s/it]
911
  33%|███▎ | 407/1250 [10:37<23:55, 1.70s/it]
912
  33%|███▎ | 408/1250 [10:38<24:11, 1.72s/it]
913
  33%|███▎ | 409/1250 [10:40<23:27, 1.67s/it]
914
  33%|███▎ | 410/1250 [10:41<22:04, 1.58s/it]
915
  33%|███▎ | 411/1250 [10:42<20:25, 1.46s/it]
916
  33%|███▎ | 412/1250 [10:43<18:26, 1.32s/it]
917
  33%|███▎ | 413/1250 [10:46<22:59, 1.65s/it]
918
  33%|███▎ | 414/1250 [10:48<23:20, 1.68s/it]
919
  33%|███▎ | 415/1250 [10:49<22:33, 1.62s/it]
920
  33%|███▎ | 416/1250 [10:50<21:15, 1.53s/it]
921
  33%|███▎ | 417/1250 [10:52<19:25, 1.40s/it]
922
  33%|███▎ | 418/1250 [10:52<17:26, 1.26s/it]
923
  34%|███▎ | 419/1250 [10:55<21:48, 1.57s/it]
924
  34%|███▎ | 420/1250 [10:56<22:18, 1.61s/it]
925
  34%|███▎ | 421/1250 [10:58<21:51, 1.58s/it]
926
  34%|███▍ | 422/1250 [10:59<21:12, 1.54s/it]
927
  34%|███▍ | 423/1250 [11:01<19:59, 1.45s/it]
928
  34%|███▍ | 424/1250 [11:02<18:08, 1.32s/it]
929
  34%|███▍ | 425/1250 [11:03<19:17, 1.40s/it]
930
  34%|███▍ | 426/1250 [11:06<25:20, 1.85s/it]
931
  34%|███▍ | 427/1250 [11:08<24:54, 1.82s/it]
932
  34%|███▍ | 428/1250 [11:09<23:45, 1.73s/it]
933
  34%|███▍ | 429/1250 [11:11<22:22, 1.64s/it]
934
  34%|███▍ | 430/1250 [11:12<20:29, 1.50s/it]
935
  34%|███▍ | 431/1250 [11:13<18:27, 1.35s/it]
936
  35%|███▍ | 432/1250 [11:15<21:54, 1.61s/it]
937
  35%|███▍ | 433/1250 [11:17<22:24, 1.65s/it]
938
  35%|███▍ | 434/1250 [11:18<21:43, 1.60s/it]
939
  35%|███▍ | 435/1250 [11:20<20:37, 1.52s/it]
940
  35%|███▍ | 436/1250 [11:21<19:27, 1.43s/it]
941
  35%|███▍ | 437/1250 [11:22<17:33, 1.30s/it]
942
  35%|███▌ | 438/1250 [11:24<21:45, 1.61s/it]
943
  35%|███▌ | 439/1250 [11:26<22:22, 1.65s/it]
944
  35%|███▌ | 440/1250 [11:28<21:56, 1.62s/it]
945
  35%|███▌ | 441/1250 [11:29<21:01, 1.56s/it]
946
  35%|███▌ | 442/1250 [11:30<19:33, 1.45s/it]
947
  35%|███▌ | 443/1250 [11:31<17:42, 1.32s/it]
948
  36%|███▌ | 444/1250 [11:34<22:11, 1.65s/it]
949
  36%|███▌ | 445/1250 [11:35<22:31, 1.68s/it]
950
  36%|███▌ | 446/1250 [11:37<22:06, 1.65s/it]
951
  36%|███▌ | 447/1250 [11:38<20:52, 1.56s/it]
952
  36%|███▌ | 448/1250 [11:40<19:07, 1.43s/it]
953
  36%|███▌ | 449/1250 [11:40<17:11, 1.29s/it]
954
  36%|███▌ | 450/1250 [11:42<18:49, 1.41s/it]
955
  36%|███▌ | 451/1250 [11:45<24:40, 1.85s/it]
956
  36%|███▌ | 452/1250 [11:47<24:24, 1.84s/it]
957
  36%|███▌ | 453/1250 [11:48<23:15, 1.75s/it]
958
  36%|███▋ | 454/1250 [11:50<21:50, 1.65s/it]
959
  36%|███▋ | 455/1250 [11:51<20:05, 1.52s/it]
960
  36%|███▋ | 456/1250 [11:52<17:55, 1.35s/it]
961
  37%|███▋ | 457/1250 [11:54<22:22, 1.69s/it]
962
  37%|███▋ | 458/1250 [11:56<22:42, 1.72s/it]
963
  37%|███▋ | 459/1250 [11:58<21:46, 1.65s/it]
964
  37%|███▋ | 460/1250 [11:59<20:25, 1.55s/it]
965
  37%|███▋ | 461/1250 [12:00<18:57, 1.44s/it]
966
  37%|███▋ | 462/1250 [12:01<17:05, 1.30s/it]
967
  37%|███▋ | 463/1250 [12:03<20:37, 1.57s/it]
968
  37%|███▋ | 464/1250 [12:05<21:05, 1.61s/it]
969
  37%|███▋ | 465/1250 [12:07<20:35, 1.57s/it]
970
  37%|███▋ | 466/1250 [12:08<19:39, 1.50s/it]
971
  37%|███▋ | 467/1250 [12:09<18:22, 1.41s/it]
972
  37%|███▋ | 468/1250 [12:10<16:57, 1.30s/it]
973
  38%|███▊ | 469/1250 [12:12<20:46, 1.60s/it]
974
  38%|███▊ | 470/1250 [12:14<21:07, 1.62s/it]
975
  38%|███▊ | 471/1250 [12:16<20:47, 1.60s/it]
976
  38%|███▊ | 472/1250 [12:17<19:43, 1.52s/it]
977
  38%|███▊ | 473/1250 [12:18<18:18, 1.41s/it]
978
  38%|███▊ | 474/1250 [12:19<16:37, 1.29s/it]
979
  38%|███▊ | 475/1250 [12:21<18:49, 1.46s/it]
980
  38%|███▊ | 476/1250 [12:24<24:27, 1.90s/it]
981
  38%|███▊ | 477/1250 [12:26<24:00, 1.86s/it]
982
  38%|███▊ | 478/1250 [12:27<22:39, 1.76s/it]
983
  38%|███▊ | 479/1250 [12:29<21:01, 1.64s/it]
984
  38%|███▊ | 480/1250 [12:30<18:58, 1.48s/it]
985
  38%|███▊ | 481/1250 [12:31<16:44, 1.31s/it]
986
  39%|███▊ | 482/1250 [12:33<20:30, 1.60s/it]
987
  39%|███▊ | 483/1250 [12:35<20:39, 1.62s/it]
988
  39%|███▊ | 484/1250 [12:36<20:30, 1.61s/it]
989
  39%|███▉ | 485/1250 [12:38<19:36, 1.54s/it]
990
  39%|███▉ | 486/1250 [12:39<18:15, 1.43s/it]
991
  39%|███▉ | 487/1250 [12:40<16:37, 1.31s/it]
992
  39%|███▉ | 488/1250 [12:42<20:55, 1.65s/it]
993
  39%|███▉ | 489/1250 [12:44<21:17, 1.68s/it]
994
  39%|███▉ | 490/1250 [12:45<20:34, 1.62s/it]
995
  39%|███▉ | 491/1250 [12:47<19:28, 1.54s/it]
996
  39%|███▉ | 492/1250 [12:48<18:28, 1.46s/it]
997
  39%|███▉ | 493/1250 [12:49<16:39, 1.32s/it]
998
  40%|███▉ | 494/1250 [12:51<20:23, 1.62s/it]
999
  40%|███▉ | 495/1250 [12:53<20:39, 1.64s/it]
1000
  40%|███▉ | 496/1250 [12:55<20:11, 1.61s/it]
1001
  40%|███▉ | 497/1250 [12:56<19:09, 1.53s/it]
1002
  40%|███▉ | 498/1250 [12:57<17:44, 1.42s/it]
1003
  40%|███▉ | 499/1250 [12:58<15:57, 1.28s/it]
1004
  40%|████ | 500/1250 [13:00<17:17, 1.38s/it]
1005
 
1006
  40%|████ | 500/1250 [13:00<17:17, 1.38s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.
1007
+ ***** Running Evaluation *****
1008
+ Num examples = 341
1009
+ Batch size = 8
1010
+ {'loss': 20.0794, 'learning_rate': 3.675e-06, 'epoch': 3.98}
1011
+ {'loss': 10.5776, 'learning_rate': 7.425e-06, 'epoch': 7.98}
1012
+ {'loss': 6.6033, 'learning_rate': 1.1174999999999999e-05, 'epoch': 11.98}
1013
+ {'loss': 5.3857, 'learning_rate': 1.4925e-05, 'epoch': 15.98}
1014
+ {'loss': 4.4431, 'learning_rate': 1.8675e-05, 'epoch': 19.98}
1015
+
1016
+
1017
  0%| | 0/43 [00:00<?, ?it/s]
1018
+
1019
  5%|▍ | 2/43 [00:00<00:04, 9.99it/s]
1020
+
1021
  7%|▋ | 3/43 [00:00<00:07, 5.48it/s]
1022
+
1023
  9%|▉ | 4/43 [00:00<00:09, 4.26it/s]
1024
+
1025
  12%|█▏ | 5/43 [00:01<00:09, 4.09it/s]
1026
+
1027
  14%|█▍ | 6/43 [00:01<00:09, 3.90it/s]
1028
+
1029
  16%|█▋ | 7/43 [00:01<00:09, 3.85it/s]
1030
+
1031
  19%|█▊ | 8/43 [00:01<00:09, 3.77it/s]
1032
+
1033
  21%|██ | 9/43 [00:02<00:09, 3.47it/s]
1034
+
1035
  23%|██▎ | 10/43 [00:02<00:08, 3.74it/s]
1036
+
1037
  26%|██▌ | 11/43 [00:02<00:09, 3.54it/s]
1038
+
1039
  28%|██▊ | 12/43 [00:03<00:08, 3.77it/s]
1040
+
1041
  30%|███ | 13/43 [00:03<00:07, 3.86it/s]
1042
+
1043
  33%|███▎ | 14/43 [00:03<00:08, 3.48it/s]
1044
+
1045
  35%|███▍ | 15/43 [00:04<00:09, 2.99it/s]
1046
+
1047
  37%|███▋ | 16/43 [00:04<00:08, 3.05it/s]
1048
+
1049
  40%|███▉ | 17/43 [00:04<00:08, 3.14it/s]
1050
+
1051
  42%|████▏ | 18/43 [00:05<00:08, 3.05it/s]
1052
+
1053
  44%|████▍ | 19/43 [00:05<00:08, 2.85it/s]
1054
+
1055
  47%|████▋ | 20/43 [00:05<00:09, 2.54it/s]
1056
+
1057
  49%|████▉ | 21/43 [00:06<00:08, 2.66it/s]
1058
+
1059
  51%|█████ | 22/43 [00:06<00:07, 2.70it/s]
1060
+
1061
  53%|█████▎ | 23/43 [00:06<00:07, 2.82it/s]
1062
+
1063
  56%|█████▌ | 24/43 [00:07<00:06, 3.09it/s]
1064
+
1065
  58%|█████▊ | 25/43 [00:07<00:05, 3.22it/s]
1066
+
1067
  60%|██████ | 26/43 [00:07<00:05, 3.21it/s]
1068
+
1069
  63%|██████▎ | 27/43 [00:07<00:04, 3.59it/s]
1070
+
1071
  65%|██████▌ | 28/43 [00:08<00:04, 3.65it/s]
1072
+
1073
  67%|██████▋ | 29/43 [00:08<00:03, 3.76it/s]
1074
+
1075
  70%|██████▉ | 30/43 [00:08<00:03, 3.83it/s]
1076
+
1077
  72%|███████▏ | 31/43 [00:08<00:03, 3.79it/s]
1078
+
1079
  74%|███████▍ | 32/43 [00:09<00:02, 3.85it/s]
1080
+
1081
  77%|███████▋ | 33/43 [00:09<00:02, 3.54it/s]
1082
+
1083
  79%|███████▉ | 34/43 [00:09<00:02, 3.51it/s]
1084
+
1085
  81%|████████▏ | 35/43 [00:10<00:02, 3.51it/s]
1086
+
1087
  84%|████████▎ | 36/43 [00:10<00:02, 3.42it/s]
1088
+
1089
  86%|████████▌ | 37/43 [00:10<00:01, 3.56it/s]
1090
+
1091
  88%|████████▊ | 38/43 [00:11<00:01, 3.32it/s]
1092
+
1093
  91%|█████████ | 39/43 [00:11<00:01, 3.38it/s]
1094
+
1095
  93%|█████████▎| 40/43 [00:11<00:00, 3.27it/s]
1096
+
1097
  95%|█████████▌| 41/43 [00:11<00:00, 3.37it/s]
1098
+
1099
  98%|█████████▊| 42/43 [00:12<00:00, 3.19it/s]
1100
+
1101
 
1102
+
1103
 
1104
  40%|████ | 500/1250 [13:13<17:17, 1.38s/it]
1105
+
1106
+
1107
  Saving model checkpoint to ./checkpoint-500
1108
+ Configuration saved in ./checkpoint-500/config.json
1109
+ Model weights saved in ./checkpoint-500/pytorch_model.bin
1110
+ Configuration saved in ./checkpoint-500/preprocessor_config.json
1111
+ Configuration saved in ./preprocessor_config.json
preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b3c7996c48bbce91a11a7b8efbb9dbd48a52cc46b992569ecdef9e54e1180da
3
+ size 1262173745
run.sh ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ python run_speech_recognition_ctc.py \
2
+ --dataset_name="mozilla-foundation/common_voice_8_0" \
3
+ --model_name_or_path="facebook/wav2vec2-xls-r-300m" \
4
+ --dataset_config_name="ur" \
5
+ --output_dir="./" \
6
+ --overwrite_output_dir \
7
+ --num_train_epochs="50" \
8
+ --per_device_train_batch_size="8" \
9
+ --per_device_eval_batch_size="8" \
10
+ --gradient_accumulation_steps="4" \
11
+ --learning_rate="7.5e-5" \
12
+ --warmup_steps="2000" \
13
+ --length_column_name="input_length" \
14
+ --evaluation_strategy="steps" \
15
+ --text_column_name="sentence" \
16
+ --chars_to_ignore , ? . ! \- \; \: \" “ % ‘ ” � — ’ … – \
17
+ --save_steps="500" \
18
+ --eval_steps="500" \
19
+ --logging_steps="100" \
20
+ --layerdrop="0.0" \
21
+ --activation_dropout="0.1" \
22
+ --save_total_limit="3" \
23
+ --freeze_feature_encoder \
24
+ --feat_proj_dropout="0.0" \
25
+ --mask_time_prob="0.75" \
26
+ --mask_time_length="10" \
27
+ --mask_feature_prob="0.25" \
28
+ --mask_feature_length="64" \
29
+ --gradient_checkpointing \
30
+ --use_auth_token \
31
+ --fp16 \
32
+ --group_by_length \
33
+ --do_train=1 \
34
+ --do_eval=1 \
35
+ --push_to_hub
run_speech_recognition_ctc.py ADDED
@@ -0,0 +1,737 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding=utf-8
3
+ # Copyright 2021 The HuggingFace Inc. team. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+
16
+ """ Fine-tuning a 🤗 Transformers CTC model for automatic speech recognition"""
17
+
18
+ import functools
19
+ import json
20
+ import logging
21
+ import os
22
+ import re
23
+ import sys
24
+ import warnings
25
+ from dataclasses import dataclass, field
26
+ from typing import Dict, List, Optional, Union
27
+
28
+ import datasets
29
+ import numpy as np
30
+ import torch
31
+ from datasets import DatasetDict, load_dataset, load_metric
32
+
33
+ import transformers
34
+ from transformers import (
35
+ AutoConfig,
36
+ AutoFeatureExtractor,
37
+ AutoModelForCTC,
38
+ AutoProcessor,
39
+ AutoTokenizer,
40
+ HfArgumentParser,
41
+ Trainer,
42
+ TrainingArguments,
43
+ Wav2Vec2Processor,
44
+ set_seed,
45
+ )
46
+ from transformers.trainer_utils import get_last_checkpoint, is_main_process
47
+ from transformers.utils import check_min_version
48
+ from transformers.utils.versions import require_version
49
+
50
+
51
+ # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
52
+ check_min_version("4.17.0.dev0")
53
+
54
+ require_version("datasets>=1.13.3", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt")
55
+
56
+
57
+ logger = logging.getLogger(__name__)
58
+
59
+
60
+ def list_field(default=None, metadata=None):
61
+ return field(default_factory=lambda: default, metadata=metadata)
62
+
63
+
64
+ @dataclass
65
+ class ModelArguments:
66
+ """
67
+ Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
68
+ """
69
+
70
+ model_name_or_path: str = field(
71
+ metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
72
+ )
73
+ tokenizer_name_or_path: Optional[str] = field(
74
+ default=None,
75
+ metadata={"help": "Path to pretrained tokenizer or tokenizer identifier from huggingface.co/models"},
76
+ )
77
+ cache_dir: Optional[str] = field(
78
+ default=None,
79
+ metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
80
+ )
81
+ freeze_feature_encoder: bool = field(
82
+ default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
83
+ )
84
+ attention_dropout: float = field(
85
+ default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."}
86
+ )
87
+ activation_dropout: float = field(
88
+ default=0.0, metadata={"help": "The dropout ratio for activations inside the fully connected layer."}
89
+ )
90
+ feat_proj_dropout: float = field(default=0.0, metadata={"help": "The dropout ratio for the projected features."})
91
+ hidden_dropout: float = field(
92
+ default=0.0,
93
+ metadata={
94
+ "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
95
+ },
96
+ )
97
+ final_dropout: float = field(
98
+ default=0.0,
99
+ metadata={"help": "The dropout probability for the final projection layer."},
100
+ )
101
+ mask_time_prob: float = field(
102
+ default=0.05,
103
+ metadata={
104
+ "help": "Probability of each feature vector along the time axis to be chosen as the start of the vector"
105
+ "span to be masked. Approximately ``mask_time_prob * sequence_length // mask_time_length`` feature"
106
+ "vectors will be masked along the time axis."
107
+ },
108
+ )
109
+ mask_time_length: int = field(
110
+ default=10,
111
+ metadata={"help": "Length of vector span to mask along the time axis."},
112
+ )
113
+ mask_feature_prob: float = field(
114
+ default=0.0,
115
+ metadata={
116
+ "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
117
+ "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
118
+ },
119
+ )
120
+ mask_feature_length: int = field(
121
+ default=10,
122
+ metadata={"help": "Length of vector span to mask along the feature axis."},
123
+ )
124
+ layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."})
125
+ ctc_loss_reduction: Optional[str] = field(
126
+ default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
127
+ )
128
+
129
+
130
+ @dataclass
131
+ class DataTrainingArguments:
132
+ """
133
+ Arguments pertaining to what data we are going to input our model for training and eval.
134
+
135
+ Using `HfArgumentParser` we can turn this class
136
+ into argparse arguments to be able to specify them on
137
+ the command line.
138
+ """
139
+
140
+ dataset_name: str = field(
141
+ metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
142
+ )
143
+ dataset_config_name: str = field(
144
+ default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
145
+ )
146
+ train_split_name: str = field(
147
+ default="train+validation",
148
+ metadata={
149
+ "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train+validation'"
150
+ },
151
+ )
152
+ eval_split_name: str = field(
153
+ default="test",
154
+ metadata={
155
+ "help": "The name of the evaluation data set split to use (via the datasets library). Defaults to 'test'"
156
+ },
157
+ )
158
+ audio_column_name: str = field(
159
+ default="audio",
160
+ metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
161
+ )
162
+ text_column_name: str = field(
163
+ default="text",
164
+ metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
165
+ )
166
+ overwrite_cache: bool = field(
167
+ default=False, metadata={"help": "Overwrite the cached preprocessed datasets or not."}
168
+ )
169
+ preprocessing_num_workers: Optional[int] = field(
170
+ default=None,
171
+ metadata={"help": "The number of processes to use for the preprocessing."},
172
+ )
173
+ max_train_samples: Optional[int] = field(
174
+ default=None,
175
+ metadata={
176
+ "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
177
+ "value if set."
178
+ },
179
+ )
180
+ max_eval_samples: Optional[int] = field(
181
+ default=None,
182
+ metadata={
183
+ "help": "For debugging purposes or quicker training, truncate the number of validation examples to this "
184
+ "value if set."
185
+ },
186
+ )
187
+ chars_to_ignore: Optional[List[str]] = list_field(
188
+ default=None,
189
+ metadata={"help": "A list of characters to remove from the transcripts."},
190
+ )
191
+ eval_metrics: List[str] = list_field(
192
+ default=["wer"],
193
+ metadata={"help": "A list of metrics the model should be evaluated on. E.g. `'wer cer'`"},
194
+ )
195
+ max_duration_in_seconds: float = field(
196
+ default=20.0,
197
+ metadata={
198
+ "help": "Filter audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
199
+ },
200
+ )
201
+ min_duration_in_seconds: float = field(
202
+ default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
203
+ )
204
+ preprocessing_only: bool = field(
205
+ default=False,
206
+ metadata={
207
+ "help": "Whether to only do data preprocessing and skip training. "
208
+ "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
209
+ "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
210
+ "so that the cached datasets can consequently be loaded in distributed training"
211
+ },
212
+ )
213
+ use_auth_token: bool = field(
214
+ default=False,
215
+ metadata={
216
+ "help": "If :obj:`True`, will use the token generated when running"
217
+ ":obj:`transformers-cli login` as HTTP bearer authorization for remote files."
218
+ },
219
+ )
220
+ unk_token: str = field(
221
+ default="[UNK]",
222
+ metadata={"help": "The unk token for the tokenizer"},
223
+ )
224
+ pad_token: str = field(
225
+ default="[PAD]",
226
+ metadata={"help": "The padding token for the tokenizer"},
227
+ )
228
+ word_delimiter_token: str = field(
229
+ default="|",
230
+ metadata={"help": "The word delimiter token for the tokenizer"},
231
+ )
232
+ phoneme_language: Optional[str] = field(
233
+ default=None,
234
+ metadata={
235
+ "help": "The target language that should be used be"
236
+ " passed to the tokenizer for tokenization. Note that"
237
+ " this is only relevant if the model classifies the"
238
+ " input audio to a sequence of phoneme sequences."
239
+ },
240
+ )
241
+
242
+
243
+ @dataclass
244
+ class DataCollatorCTCWithPadding:
245
+ """
246
+ Data collator that will dynamically pad the inputs received.
247
+ Args:
248
+ processor (:class:`~transformers.AutoProcessor`)
249
+ The processor used for proccessing the data.
250
+ padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
251
+ Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
252
+ among:
253
+ * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
254
+ sequence if provided).
255
+ * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
256
+ maximum acceptable input length for the model if that argument is not provided.
257
+ * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
258
+ different lengths).
259
+ max_length (:obj:`int`, `optional`):
260
+ Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
261
+ max_length_labels (:obj:`int`, `optional`):
262
+ Maximum length of the ``labels`` returned list and optionally padding length (see above).
263
+ pad_to_multiple_of (:obj:`int`, `optional`):
264
+ If set will pad the sequence to a multiple of the provided value.
265
+ This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
266
+ 7.5 (Volta).
267
+ """
268
+
269
+ processor: AutoProcessor
270
+ padding: Union[bool, str] = "longest"
271
+ pad_to_multiple_of: Optional[int] = None
272
+ pad_to_multiple_of_labels: Optional[int] = None
273
+
274
+ def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
275
+ # split inputs and labels since they have to be of different lenghts and need
276
+ # different padding methods
277
+ input_features = [{"input_values": feature["input_values"]} for feature in features]
278
+ label_features = [{"input_ids": feature["labels"]} for feature in features]
279
+
280
+ batch = self.processor.pad(
281
+ input_features,
282
+ padding=self.padding,
283
+ pad_to_multiple_of=self.pad_to_multiple_of,
284
+ return_tensors="pt",
285
+ )
286
+
287
+ with self.processor.as_target_processor():
288
+ labels_batch = self.processor.pad(
289
+ label_features,
290
+ padding=self.padding,
291
+ pad_to_multiple_of=self.pad_to_multiple_of_labels,
292
+ return_tensors="pt",
293
+ )
294
+
295
+ # replace padding with -100 to ignore loss correctly
296
+ labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100)
297
+
298
+ batch["labels"] = labels
299
+
300
+ return batch
301
+
302
+
303
+ def create_vocabulary_from_data(
304
+ datasets: DatasetDict,
305
+ word_delimiter_token: Optional[str] = None,
306
+ unk_token: Optional[str] = None,
307
+ pad_token: Optional[str] = None,
308
+ ):
309
+ # Given training and test labels create vocabulary
310
+ def extract_all_chars(batch):
311
+ all_text = " ".join(batch["target_text"])
312
+ vocab = list(set(all_text))
313
+ return {"vocab": [vocab], "all_text": [all_text]}
314
+
315
+ vocabs = datasets.map(
316
+ extract_all_chars,
317
+ batched=True,
318
+ batch_size=-1,
319
+ keep_in_memory=True,
320
+ remove_columns=datasets["train"].column_names,
321
+ )
322
+
323
+ # take union of all unique characters in each dataset
324
+ vocab_set = functools.reduce(
325
+ lambda vocab_1, vocab_2: set(vocab_1["vocab"][0]) | set(vocab_2["vocab"][0]), vocabs.values()
326
+ )
327
+
328
+ vocab_dict = {v: k for k, v in enumerate(sorted(list(vocab_set)))}
329
+
330
+ # replace white space with delimiter token
331
+ if word_delimiter_token is not None:
332
+ vocab_dict[word_delimiter_token] = vocab_dict[" "]
333
+ del vocab_dict[" "]
334
+
335
+ # add unk and pad token
336
+ if unk_token is not None:
337
+ vocab_dict[unk_token] = len(vocab_dict)
338
+
339
+ if pad_token is not None:
340
+ vocab_dict[pad_token] = len(vocab_dict)
341
+
342
+ return vocab_dict
343
+
344
+
345
+ def main():
346
+ # See all possible arguments in src/transformers/training_args.py
347
+ # or by passing the --help flag to this script.
348
+ # We now keep distinct sets of args, for a cleaner separation of concerns.
349
+
350
+ parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
351
+ if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
352
+ # If we pass only one argument to the script and it's the path to a json file,
353
+ # let's parse it to get our arguments.
354
+ model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
355
+ else:
356
+ model_args, data_args, training_args = parser.parse_args_into_dataclasses()
357
+
358
+ # Detecting last checkpoint.
359
+ last_checkpoint = None
360
+ if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
361
+ last_checkpoint = get_last_checkpoint(training_args.output_dir)
362
+ if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
363
+ raise ValueError(
364
+ f"Output directory ({training_args.output_dir}) already exists and is not empty. "
365
+ "Use --overwrite_output_dir to overcome."
366
+ )
367
+ elif last_checkpoint is not None:
368
+ logger.info(
369
+ f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
370
+ "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
371
+ )
372
+
373
+ # Setup logging
374
+ logging.basicConfig(
375
+ format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
376
+ datefmt="%m/%d/%Y %H:%M:%S",
377
+ handlers=[logging.StreamHandler(sys.stdout)],
378
+ )
379
+ logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
380
+
381
+ # Log on each process the small summary:
382
+ logger.warning(
383
+ f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
384
+ f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
385
+ )
386
+ # Set the verbosity to info of the Transformers logger (on main process only):
387
+ if is_main_process(training_args.local_rank):
388
+ transformers.utils.logging.set_verbosity_info()
389
+ logger.info("Training/evaluation parameters %s", training_args)
390
+
391
+ # Set seed before initializing model.
392
+ set_seed(training_args.seed)
393
+
394
+ # 1. First, let's load the dataset
395
+ raw_datasets = DatasetDict()
396
+
397
+ if training_args.do_train:
398
+ raw_datasets["train"] = load_dataset(
399
+ data_args.dataset_name,
400
+ data_args.dataset_config_name,
401
+ split=data_args.train_split_name,
402
+ use_auth_token=data_args.use_auth_token,
403
+ )
404
+
405
+ if data_args.audio_column_name not in raw_datasets["train"].column_names:
406
+ raise ValueError(
407
+ f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
408
+ "Make sure to set `--audio_column_name` to the correct audio column - one of "
409
+ f"{', '.join(raw_datasets['train'].column_names)}."
410
+ )
411
+
412
+ if data_args.text_column_name not in raw_datasets["train"].column_names:
413
+ raise ValueError(
414
+ f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
415
+ "Make sure to set `--text_column_name` to the correct text column - one of "
416
+ f"{', '.join(raw_datasets['train'].column_names)}."
417
+ )
418
+
419
+ if data_args.max_train_samples is not None:
420
+ raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples))
421
+
422
+ if training_args.do_eval:
423
+ raw_datasets["eval"] = load_dataset(
424
+ data_args.dataset_name,
425
+ data_args.dataset_config_name,
426
+ split=data_args.eval_split_name,
427
+ use_auth_token=data_args.use_auth_token,
428
+ )
429
+
430
+ if data_args.max_eval_samples is not None:
431
+ raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples))
432
+
433
+ # 2. We remove some special characters from the datasets
434
+ # that make training complicated and do not help in transcribing the speech
435
+ # E.g. characters, such as `,` and `.` do not really have an acoustic characteristic
436
+ # that could be easily picked up by the model
437
+ chars_to_ignore_regex = (
438
+ f'[{"".join(data_args.chars_to_ignore)}]' if data_args.chars_to_ignore is not None else None
439
+ )
440
+ text_column_name = data_args.text_column_name
441
+
442
+ def remove_special_characters(batch):
443
+ if chars_to_ignore_regex is not None:
444
+ batch["target_text"] = re.sub(chars_to_ignore_regex, "", batch[text_column_name]).lower() + " "
445
+ else:
446
+ batch["target_text"] = batch[text_column_name].lower() + " "
447
+ return batch
448
+
449
+ with training_args.main_process_first(desc="dataset map special characters removal"):
450
+ raw_datasets = raw_datasets.map(
451
+ remove_special_characters,
452
+ remove_columns=[text_column_name],
453
+ desc="remove special characters from datasets",
454
+ )
455
+
456
+ # save special tokens for tokenizer
457
+ word_delimiter_token = data_args.word_delimiter_token
458
+ unk_token = data_args.unk_token
459
+ pad_token = data_args.pad_token
460
+
461
+ # 3. Next, let's load the config as we might need it to create
462
+ # the tokenizer
463
+ # load config
464
+ config = AutoConfig.from_pretrained(
465
+ model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_auth_token=data_args.use_auth_token
466
+ )
467
+
468
+ # 4. Next, if no tokenizer file is defined,
469
+ # we create the vocabulary of the model by extracting all unique characters from
470
+ # the training and evaluation datasets
471
+ # We need to make sure that only first rank saves vocabulary
472
+ # make sure all processes wait until vocab is created
473
+ tokenizer_name_or_path = model_args.tokenizer_name_or_path
474
+ tokenizer_kwargs = {}
475
+ if tokenizer_name_or_path is None:
476
+ # save vocab in training output dir
477
+ tokenizer_name_or_path = training_args.output_dir
478
+
479
+ vocab_file = os.path.join(tokenizer_name_or_path, "vocab.json")
480
+
481
+ with training_args.main_process_first():
482
+ if training_args.overwrite_output_dir and os.path.isfile(vocab_file):
483
+ os.remove(vocab_file)
484
+
485
+ with training_args.main_process_first(desc="dataset map vocabulary creation"):
486
+ if not os.path.isfile(vocab_file):
487
+ os.makedirs(tokenizer_name_or_path, exist_ok=True)
488
+ vocab_dict = create_vocabulary_from_data(
489
+ raw_datasets,
490
+ word_delimiter_token=word_delimiter_token,
491
+ unk_token=unk_token,
492
+ pad_token=pad_token,
493
+ )
494
+
495
+ # save vocab dict to be loaded into tokenizer
496
+ with open(vocab_file, "w") as file:
497
+ json.dump(vocab_dict, file)
498
+
499
+ # if tokenizer has just been created
500
+ # it is defined by `tokenizer_class` if present in config else by `model_type`
501
+ tokenizer_kwargs = {
502
+ "config": config if config.tokenizer_class is not None else None,
503
+ "tokenizer_type": config.model_type if config.tokenizer_class is None else None,
504
+ "unk_token": unk_token,
505
+ "pad_token": pad_token,
506
+ "word_delimiter_token": word_delimiter_token,
507
+ }
508
+
509
+ # 5. Now we can instantiate the feature extractor, tokenizer and model
510
+ # Note for distributed training, the .from_pretrained methods guarantee that only
511
+ # one local process can concurrently download model & vocab.
512
+
513
+ # load feature_extractor and tokenizer
514
+ tokenizer = AutoTokenizer.from_pretrained(
515
+ tokenizer_name_or_path,
516
+ use_auth_token=data_args.use_auth_token,
517
+ **tokenizer_kwargs,
518
+ )
519
+ feature_extractor = AutoFeatureExtractor.from_pretrained(
520
+ model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_auth_token=data_args.use_auth_token
521
+ )
522
+
523
+ # adapt config
524
+ config.update(
525
+ {
526
+ "feat_proj_dropout": model_args.feat_proj_dropout,
527
+ "attention_dropout": model_args.attention_dropout,
528
+ "hidden_dropout": model_args.hidden_dropout,
529
+ "final_dropout": model_args.final_dropout,
530
+ "mask_time_prob": model_args.mask_time_prob,
531
+ "mask_time_length": model_args.mask_time_length,
532
+ "mask_feature_prob": model_args.mask_feature_prob,
533
+ "mask_feature_length": model_args.mask_feature_length,
534
+ "gradient_checkpointing": training_args.gradient_checkpointing,
535
+ "layerdrop": model_args.layerdrop,
536
+ "ctc_loss_reduction": model_args.ctc_loss_reduction,
537
+ "pad_token_id": tokenizer.pad_token_id,
538
+ "vocab_size": len(tokenizer),
539
+ "activation_dropout": model_args.activation_dropout,
540
+ }
541
+ )
542
+
543
+ # create model
544
+ model = AutoModelForCTC.from_pretrained(
545
+ model_args.model_name_or_path,
546
+ cache_dir=model_args.cache_dir,
547
+ config=config,
548
+ use_auth_token=data_args.use_auth_token,
549
+ )
550
+
551
+ # freeze encoder
552
+ if model_args.freeze_feature_encoder:
553
+ model.freeze_feature_encoder()
554
+
555
+ # 6. Now we preprocess the datasets including loading the audio, resampling and normalization
556
+ # Thankfully, `datasets` takes care of automatically loading and resampling the audio,
557
+ # so that we just need to set the correct target sampling rate and normalize the input
558
+ # via the `feature_extractor`
559
+
560
+ # make sure that dataset decodes audio with correct sampling rate
561
+ dataset_sampling_rate = next(iter(raw_datasets.values())).features[data_args.audio_column_name].sampling_rate
562
+ if dataset_sampling_rate != feature_extractor.sampling_rate:
563
+ raw_datasets = raw_datasets.cast_column(
564
+ data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
565
+ )
566
+
567
+ # derive max & min input length for sample rate & max duration
568
+ max_input_length = data_args.max_duration_in_seconds * feature_extractor.sampling_rate
569
+ min_input_length = data_args.min_duration_in_seconds * feature_extractor.sampling_rate
570
+ audio_column_name = data_args.audio_column_name
571
+ num_workers = data_args.preprocessing_num_workers
572
+
573
+ # `phoneme_language` is only relevant if the model is fine-tuned on phoneme classification
574
+ phoneme_language = data_args.phoneme_language
575
+
576
+ # Preprocessing the datasets.
577
+ # We need to read the audio files as arrays and tokenize the targets.
578
+ def prepare_dataset(batch):
579
+ # load audio
580
+ sample = batch[audio_column_name]
581
+
582
+ inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
583
+ batch["input_values"] = inputs.input_values[0]
584
+ batch["input_length"] = len(batch["input_values"])
585
+
586
+ # encode targets
587
+ additional_kwargs = {}
588
+ if phoneme_language is not None:
589
+ additional_kwargs["phonemizer_lang"] = phoneme_language
590
+
591
+ batch["labels"] = tokenizer(batch["target_text"], **additional_kwargs).input_ids
592
+ return batch
593
+
594
+ with training_args.main_process_first(desc="dataset map preprocessing"):
595
+ vectorized_datasets = raw_datasets.map(
596
+ prepare_dataset,
597
+ remove_columns=next(iter(raw_datasets.values())).column_names,
598
+ num_proc=num_workers,
599
+ desc="preprocess datasets",
600
+ )
601
+
602
+ def is_audio_in_length_range(length):
603
+ return length > min_input_length and length < max_input_length
604
+
605
+ # filter data that is shorter than min_input_length
606
+ vectorized_datasets = vectorized_datasets.filter(
607
+ is_audio_in_length_range,
608
+ num_proc=num_workers,
609
+ input_columns=["input_length"],
610
+ )
611
+
612
+ # 7. Next, we can prepare the training.
613
+ # Let's use word error rate (WER) as our evaluation metric,
614
+ # instantiate a data collator and the trainer
615
+
616
+ # Define evaluation metrics during training, *i.e.* word error rate, character error rate
617
+ eval_metrics = {metric: load_metric(metric) for metric in data_args.eval_metrics}
618
+
619
+ # for large datasets it is advised to run the preprocessing on a
620
+ # single machine first with ``args.preprocessing_only`` since there will mostly likely
621
+ # be a timeout when running the script in distributed mode.
622
+ # In a second step ``args.preprocessing_only`` can then be set to `False` to load the
623
+ # cached dataset
624
+ if data_args.preprocessing_only:
625
+ logger.info(f"Data preprocessing finished. Files cached at {vectorized_datasets.cache_files}")
626
+ return
627
+
628
+ def compute_metrics(pred):
629
+ pred_logits = pred.predictions
630
+ pred_ids = np.argmax(pred_logits, axis=-1)
631
+
632
+ pred.label_ids[pred.label_ids == -100] = tokenizer.pad_token_id
633
+
634
+ pred_str = tokenizer.batch_decode(pred_ids)
635
+ # we do not want to group tokens when computing the metrics
636
+ label_str = tokenizer.batch_decode(pred.label_ids, group_tokens=False)
637
+
638
+ metrics = {k: v.compute(predictions=pred_str, references=label_str) for k, v in eval_metrics.items()}
639
+
640
+ return metrics
641
+
642
+ # Now save everything to be able to create a single processor later
643
+ if is_main_process(training_args.local_rank):
644
+ # save feature extractor, tokenizer and config
645
+ feature_extractor.save_pretrained(training_args.output_dir)
646
+ tokenizer.save_pretrained(training_args.output_dir)
647
+ config.save_pretrained(training_args.output_dir)
648
+
649
+ try:
650
+ processor = AutoProcessor.from_pretrained(training_args.output_dir)
651
+ except (OSError, KeyError):
652
+ warnings.warn(
653
+ "Loading a processor from a feature extractor config that does not"
654
+ " include a `processor_class` attribute is deprecated and will be removed in v5. Please add the following "
655
+ " attribute to your `preprocessor_config.json` file to suppress this warning: "
656
+ " `'processor_class': 'Wav2Vec2Processor'`",
657
+ FutureWarning,
658
+ )
659
+ processor = Wav2Vec2Processor.from_pretrained(training_args.output_dir)
660
+
661
+ # Instantiate custom data collator
662
+ data_collator = DataCollatorCTCWithPadding(processor=processor)
663
+
664
+ # Initialize Trainer
665
+ trainer = Trainer(
666
+ model=model,
667
+ data_collator=data_collator,
668
+ args=training_args,
669
+ compute_metrics=compute_metrics,
670
+ train_dataset=vectorized_datasets["train"] if training_args.do_train else None,
671
+ eval_dataset=vectorized_datasets["eval"] if training_args.do_eval else None,
672
+ tokenizer=feature_extractor,
673
+ )
674
+
675
+ # 8. Finally, we can start training
676
+
677
+ # Training
678
+ if training_args.do_train:
679
+
680
+ # use last checkpoint if exist
681
+ if last_checkpoint is not None:
682
+ checkpoint = last_checkpoint
683
+ elif os.path.isdir(model_args.model_name_or_path):
684
+ checkpoint = model_args.model_name_or_path
685
+ else:
686
+ checkpoint = None
687
+
688
+ train_result = trainer.train(resume_from_checkpoint=checkpoint)
689
+ trainer.save_model()
690
+
691
+ metrics = train_result.metrics
692
+ max_train_samples = (
693
+ data_args.max_train_samples
694
+ if data_args.max_train_samples is not None
695
+ else len(vectorized_datasets["train"])
696
+ )
697
+ metrics["train_samples"] = min(max_train_samples, len(vectorized_datasets["train"]))
698
+
699
+ trainer.log_metrics("train", metrics)
700
+ trainer.save_metrics("train", metrics)
701
+ trainer.save_state()
702
+
703
+ # Evaluation
704
+ results = {}
705
+ if training_args.do_eval:
706
+ logger.info("*** Evaluate ***")
707
+ metrics = trainer.evaluate()
708
+ max_eval_samples = (
709
+ data_args.max_eval_samples if data_args.max_eval_samples is not None else len(vectorized_datasets["eval"])
710
+ )
711
+ metrics["eval_samples"] = min(max_eval_samples, len(vectorized_datasets["eval"]))
712
+
713
+ trainer.log_metrics("eval", metrics)
714
+ trainer.save_metrics("eval", metrics)
715
+
716
+ # Write model card and (optionally) push to hub
717
+ config_name = data_args.dataset_config_name if data_args.dataset_config_name is not None else "na"
718
+ kwargs = {
719
+ "finetuned_from": model_args.model_name_or_path,
720
+ "tasks": "speech-recognition",
721
+ "tags": ["automatic-speech-recognition", data_args.dataset_name],
722
+ "dataset_args": f"Config: {config_name}, Training split: {data_args.train_split_name}, Eval split: {data_args.eval_split_name}",
723
+ "dataset": f"{data_args.dataset_name.upper()} - {config_name.upper()}",
724
+ }
725
+ if "common_voice" in data_args.dataset_name:
726
+ kwargs["language"] = config_name
727
+
728
+ if training_args.push_to_hub:
729
+ trainer.push_to_hub(**kwargs)
730
+ else:
731
+ trainer.create_model_card(**kwargs)
732
+
733
+ return results
734
+
735
+
736
+ if __name__ == "__main__":
737
+ main()
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "./", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ac9126d8e40ef2b4777d784e17705def6720cee411288fde206b31f5e389f9a
3
+ size 2991
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"،": 1, "؟": 2, "ء": 3, "آ": 4, "ؤ": 5, "ئ": 6, "ا": 7, "ب": 8, "ت": 9, "ث": 10, "ج": 11, "ح": 12, "خ": 13, "د": 14, "ذ": 15, "ر": 16, "ز": 17, "س": 18, "ش": 19, "ص": 20, "ض": 21, "ط": 22, "ظ": 23, "ع": 24, "غ": 25, "ف": 26, "ق": 27, "ل": 28, "م": 29, "ن": 30, "و": 31, "ى": 32, "ي": 33, "ً": 34, "َ": 35, "ُ": 36, "ِ": 37, "ّ": 38, "ٓ": 39, "ٔ": 40, "ٰ": 41, "ٹ": 42, "پ": 43, "چ": 44, "ڈ": 45, "ڑ": 46, "ژ": 47, "ک": 48, "گ": 49, "ں": 50, "ھ": 51, "ہ": 52, "ۂ": 53, "ی": 54, "ے": 55, "۔": 56, "|": 0, "[UNK]": 57, "[PAD]": 58}