Training in progress, step 1000

Browse files

Files changed (13) hide show

.ipynb_checkpoints/run_speech_recognition_ctc-checkpoint.py +754 -0
.ipynb_checkpoints/run_training-checkpoint.sh +2 -1
.ipynb_checkpoints/special_tokens_map-checkpoint.json +1 -0
.ipynb_checkpoints/vocab-checkpoint.json +1 -0
added_tokens.json +1 -1
config.json +2 -2
pytorch_model.bin +2 -2
run_speech_recognition_ctc.py +18 -1
run_training.sh +2 -1
special_tokens_map.json +1 -1
trainer_state.json +0 -1900
training_args.bin +1 -1
vocab.json +1 -1

.ipynb_checkpoints/run_speech_recognition_ctc-checkpoint.py ADDED Viewed

	@@ -0,0 +1,754 @@

+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+""" Fine-tuning a 🤗 Transformers CTC model for automatic speech recognition"""
+import functools
+import json
+import logging
+import os
+import re
+import sys
+import warnings
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional, Union
+import datasets
+import numpy as np
+import torch
+from datasets import DatasetDict, load_dataset, load_metric
+import transformers
+from transformers import (
+    AutoConfig,
+    AutoFeatureExtractor,
+    AutoModelForCTC,
+    AutoProcessor,
+    AutoTokenizer,
+    HfArgumentParser,
+    Trainer,
+    TrainingArguments,
+    Wav2Vec2Processor,
+    set_seed,
+)
+from transformers.trainer_utils import get_last_checkpoint, is_main_process
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.16.0.dev0")
+require_version("datasets>=1.13.3", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt")
+logger = logging.getLogger(__name__)
+def list_field(default=None, metadata=None):
+    return field(default_factory=lambda: default, metadata=metadata)
+@dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    tokenizer_name_or_path: Optional[str] = field(
+        default=None,
+        metadata={"help": "Path to pretrained tokenizer or tokenizer identifier from huggingface.co/models"},
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
+    )
+    freeze_feature_encoder: bool = field(
+        default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
+    )
+    attention_dropout: float = field(
+        default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."}
+    )
+    activation_dropout: float = field(
+        default=0.0, metadata={"help": "The dropout ratio for activations inside the fully connected layer."}
+    )
+    feat_proj_dropout: float = field(default=0.0, metadata={"help": "The dropout ratio for the projected features."})
+    hidden_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
+        },
+    )
+    final_dropout: float = field(
+        default=0.0,
+        metadata={"help": "The dropout probability for the final projection layer."},
+    )
+    mask_time_prob: float = field(
+        default=0.05,
+        metadata={
+            "help": "Probability of each feature vector along the time axis to be chosen as the start of the vector"
+            "span to be masked. Approximately ``mask_time_prob * sequence_length // mask_time_length`` feature"
+            "vectors will be masked along the time axis."
+        },
+    )
+    mask_time_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the time axis."},
+    )
+    mask_feature_prob: float = field(
+        default=0.0,
+        metadata={
+            "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
+            "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
+        },
+    )
+    mask_feature_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the feature axis."},
+    )
+    layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."})
+    ctc_loss_reduction: Optional[str] = field(
+        default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
+    )
+@dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    Using `HfArgumentParser` we can turn this class
+    into argparse arguments to be able to specify them on
+    the command line.
+    """
+    dataset_name: str = field(
+        metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: str = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    train_split_name: str = field(
+        default="train+validation",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    eval_split_name: str = field(
+        default="test",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    audio_column_name: str = field(
+        default="audio",
+        metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
+    )
+    text_column_name: str = field(
+        default="text",
+        metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached preprocessed datasets or not."}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_eval_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of validation examples to this "
+            "value if set."
+        },
+    )
+    chars_to_ignore: Optional[List[str]] = list_field(
+        default=None,
+        metadata={"help": "A list of characters to remove from the transcripts."},
+    )
+    eval_metrics: List[str] = list_field(
+        default=["wer"],
+        metadata={"help": "A list of metrics the model should be evaluated on. E.g. `'wer cer'`"},
+    )
+    max_duration_in_seconds: float = field(
+        default=20.0,
+        metadata={
+            "help": "Filter audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
+        },
+    )
+    min_duration_in_seconds: float = field(
+        default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
+    )
+    preprocessing_only: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to only do data preprocessing and skip training. "
+            "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
+            "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
+            "so that the cached datasets can consequently be loaded in distributed training"
+        },
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "If :obj:`True`, will use the token generated when running"
+            ":obj:`transformers-cli login` as HTTP bearer authorization for remote files."
+        },
+    )
+    unk_token: str = field(
+        default="[UNK]",
+        metadata={"help": "The unk token for the tokenizer"},
+    )
+    pad_token: str = field(
+        default="[PAD]",
+        metadata={"help": "The padding token for the tokenizer"},
+    )
+    word_delimiter_token: str = field(
+        default="|",
+        metadata={"help": "The word delimiter token for the tokenizer"},
+    )
+    phoneme_language: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "The target language that should be used be"
+            " passed to the tokenizer for tokenization. Note that"
+            " this is only relevant if the model classifies the"
+            " input audio to a sequence of phoneme sequences."
+        },
+    )
+@dataclass
+class DataCollatorCTCWithPadding:
+    """
+    Data collator that will dynamically pad the inputs received.
+    Args:
+        processor (:class:`~transformers.AutoProcessor`)
+            The processor used for proccessing the data.
+        padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
+            among:
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        max_length (:obj:`int`, `optional`):
+            Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
+        max_length_labels (:obj:`int`, `optional`):
+            Maximum length of the ``labels`` returned list and optionally padding length (see above).
+        pad_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+    """
+    processor: AutoProcessor
+    padding: Union[bool, str] = "longest"
+    pad_to_multiple_of: Optional[int] = None
+    pad_to_multiple_of_labels: Optional[int] = None
+    def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
+        # split inputs and labels since they have to be of different lenghts and need
+        # different padding methods
+        input_features = [{"input_values": feature["input_values"]} for feature in features]
+        label_features = [{"input_ids": feature["labels"]} for feature in features]
+        batch = self.processor.pad(
+            input_features,
+            padding=self.padding,
+            pad_to_multiple_of=self.pad_to_multiple_of,
+            return_tensors="pt",
+        )
+        with self.processor.as_target_processor():
+            labels_batch = self.processor.pad(
+                label_features,
+                padding=self.padding,
+                pad_to_multiple_of=self.pad_to_multiple_of_labels,
+                return_tensors="pt",
+            )
+        # replace padding with -100 to ignore loss correctly
+        labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100)
+        batch["labels"] = labels
+        return batch
+def create_vocabulary_from_data(
+    datasets: DatasetDict,
+    word_delimiter_token: Optional[str] = None,
+    unk_token: Optional[str] = None,
+    pad_token: Optional[str] = None,
+):
+    # Given training and test labels create vocabulary
+    def extract_all_chars(batch):
+        all_text = " ".join(batch["target_text"])
+        vocab = list(set(all_text))
+        return {"vocab": [vocab], "all_text": [all_text]}
+    vocabs = datasets.map(
+        extract_all_chars,
+        batched=True,
+        batch_size=-1,
+        keep_in_memory=True,
+        remove_columns=datasets["train"].column_names,
+        load_from_cache_file=False
+    )
+    # take union of all unique characters in each dataset
+    vocab_set = functools.reduce(
+        lambda vocab_1, vocab_2: set(vocab_1["vocab"][0]) | set(vocab_2["vocab"][0]), vocabs.values()
+    )
+    vocab_dict = {v: k for k, v in enumerate(sorted(list(vocab_set)))}
+    # replace white space with delimiter token
+    if word_delimiter_token is not None:
+        vocab_dict[word_delimiter_token] = vocab_dict[" "]
+        del vocab_dict[" "]
+    # add unk and pad token
+    if unk_token is not None:
+        vocab_dict[unk_token] = len(vocab_dict)
+    if pad_token is not None:
+        vocab_dict[pad_token] = len(vocab_dict)
+    return vocab_dict
+def main():
+    # See all possible arguments in src/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+    # Detecting last checkpoint.
+    last_checkpoint = None
+    if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
+        last_checkpoint = get_last_checkpoint(training_args.output_dir)
+        if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
+            raise ValueError(
+                f"Output directory ({training_args.output_dir}) already exists and is not empty. "
+                "Use --overwrite_output_dir to overcome."
+            )
+        elif last_checkpoint is not None:
+            logger.info(
+                f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
+                "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
+            )
+    # Setup logging
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
+    # Log on each process the small summary:
+    logger.warning(
+        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
+        f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
+    )
+    # Set the verbosity to info of the Transformers logger (on main process only):
+    if is_main_process(training_args.local_rank):
+        transformers.utils.logging.set_verbosity_info()
+    logger.info("Training/evaluation parameters %s", training_args)
+    # Set seed before initializing model.
+    set_seed(training_args.seed)
+    # 1. First, let's load the dataset
+    raw_datasets = DatasetDict()
+    if training_args.do_train:
+        raw_datasets["train"] = load_dataset(
+            data_args.dataset_name,
+            data_args.dataset_config_name,
+            split=data_args.train_split_name,
+            use_auth_token=data_args.use_auth_token,
+        )
+        if data_args.audio_column_name not in raw_datasets["train"].column_names:
+            raise ValueError(
+                f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
+                "Make sure to set `--audio_column_name` to the correct audio column - one of "
+                f"{', '.join(raw_datasets['train'].column_names)}."
+            )
+        if data_args.text_column_name not in raw_datasets["train"].column_names:
+            raise ValueError(
+                f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
+                "Make sure to set `--text_column_name` to the correct text column - one of "
+                f"{', '.join(raw_datasets['train'].column_names)}."
+            )
+        if data_args.max_train_samples is not None:
+            raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples))
+    if training_args.do_eval:
+        raw_datasets["eval"] = load_dataset(
+            data_args.dataset_name,
+            data_args.dataset_config_name,
+            split=data_args.eval_split_name,
+            use_auth_token=data_args.use_auth_token,
+        )
+        if data_args.max_eval_samples is not None:
+            raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples))
+    # 2. We remove some special characters from the datasets
+    # that make training complicated and do not help in transcribing the speech
+    # E.g. characters, such as `,` and `.` do not really have an acoustic characteristic
+    # that could be easily picked up by the model
+    odd_chars_regex_string = '$&()*+.\/=@\[\]_`¡§«°´µ·»×àáâãåæçèéêëìíîïðñòóôõøùúûýþāăąćčďđēėęěğġħīıłńņňōŏőœřśşšťūůźżžơǐǔșțəʻʾʿ̥̆̇авеикморсфчшѹאבנעש་ནḫṟṣṭạảắằếễệọồộụứ‑‚„‟′″‹›→−≡⟨⟩カ东临乡关合城孙尣幺支比毛泽無生臣辶道镇黃'
+    chars_to_ignore_regex = (
+        f'[{"".join(data_args.chars_to_ignore)+ odd_chars_regex_string}]' if data_args.chars_to_ignore is not None else None
+    )
+    text_column_name = data_args.text_column_name
+    def remove_special_characters(batch):
+        if chars_to_ignore_regex is not None:
+            batch["target_text"] = re.sub(chars_to_ignore_regex, "", batch[text_column_name]).lower() + " "
+        else:
+            batch["target_text"] = batch[text_column_name].lower() + " "
+        return batch
+    with training_args.main_process_first(desc="dataset map special characters removal"):
+        raw_datasets = raw_datasets.map(
+            remove_special_characters,
+            remove_columns=[text_column_name],
+            desc="remove special characters from datasets",
+            load_from_cache_file=False
+        )
+    # save special tokens for tokenizer
+    word_delimiter_token = data_args.word_delimiter_token
+    unk_token = data_args.unk_token
+    pad_token = data_args.pad_token
+    # 3. Next, let's load the config as we might need it to create
+    # the tokenizer
+    # load config
+    config = AutoConfig.from_pretrained(
+        model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_auth_token=data_args.use_auth_token
+    )
+    # 4. Next, if no tokenizer file is defined,
+    # we create the vocabulary of the model by extracting all unique characters from
+    # the training and evaluation datasets
+    # We need to make sure that only first rank saves vocabulary
+    # make sure all processes wait until vocab is created
+    tokenizer_name_or_path = model_args.tokenizer_name_or_path
+    tokenizer_kwargs = {}
+    if tokenizer_name_or_path is None:
+        # save vocab in training output dir
+        tokenizer_name_or_path = training_args.output_dir
+        vocab_file = os.path.join(tokenizer_name_or_path, "vocab.json")
+        with training_args.main_process_first():
+            if training_args.overwrite_output_dir and os.path.isfile(vocab_file):
+                os.remove(vocab_file)
+        with training_args.main_process_first(desc="dataset map vocabulary creation"):
+            if not os.path.isfile(vocab_file):
+                os.makedirs(tokenizer_name_or_path, exist_ok=True)
+                vocab_dict = create_vocabulary_from_data(
+                    raw_datasets,
+                    word_delimiter_token=word_delimiter_token,
+                    unk_token=unk_token,
+                    pad_token=pad_token,
+                )
+                # save vocab dict to be loaded into tokenizer
+                with open(vocab_file, "w") as file:
+                    json.dump(vocab_dict, file)
+        # if tokenizer has just been created
+        # it is defined by `tokenizer_class` if present in config else by `model_type`
+        tokenizer_kwargs = {
+            "config": config if config.tokenizer_class is not None else None,
+            "tokenizer_type": config.model_type if config.tokenizer_class is None else None,
+            "unk_token": unk_token,
+            "pad_token": pad_token,
+            "word_delimiter_token": word_delimiter_token,
+        }
+    # 5. Now we can instantiate the feature extractor, tokenizer and model
+    # Note for distributed training, the .from_pretrained methods guarantee that only
+    # one local process can concurrently download model & vocab.
+    # load feature_extractor and tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(
+        tokenizer_name_or_path,
+        use_auth_token=data_args.use_auth_token,
+        **tokenizer_kwargs,
+    )
+    feature_extractor = AutoFeatureExtractor.from_pretrained(
+        model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_auth_token=data_args.use_auth_token
+    )
+    # adapt config
+    config.update(
+        {
+            "feat_proj_dropout": model_args.feat_proj_dropout,
+            "attention_dropout": model_args.attention_dropout,
+            "hidden_dropout": model_args.hidden_dropout,
+            "final_dropout": model_args.final_dropout,
+            "mask_time_prob": model_args.mask_time_prob,
+            "mask_time_length": model_args.mask_time_length,
+            "mask_feature_prob": model_args.mask_feature_prob,
+            "mask_feature_length": model_args.mask_feature_length,
+            "gradient_checkpointing": training_args.gradient_checkpointing,
+            "layerdrop": model_args.layerdrop,
+            "ctc_loss_reduction": model_args.ctc_loss_reduction,
+            "pad_token_id": tokenizer.pad_token_id,
+            "vocab_size": len(tokenizer),
+            "activation_dropout": model_args.activation_dropout,
+        }
+    )
+    # create model
+    model = AutoModelForCTC.from_pretrained(
+        model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        config=config,
+        use_auth_token=data_args.use_auth_token,
+    )
+    # freeze encoder
+    if model_args.freeze_feature_encoder:
+        model.freeze_feature_encoder()
+    # 6. Now we preprocess the datasets including loading the audio, resampling and normalization
+    # Thankfully, `datasets` takes care of automatically loading and resampling the audio,
+    # so that we just need to set the correct target sampling rate and normalize the input
+    # via the `feature_extractor`
+    # make sure that dataset decodes audio with correct sampling rate
+    dataset_sampling_rate = next(iter(raw_datasets.values())).features[data_args.audio_column_name].sampling_rate
+    if dataset_sampling_rate != feature_extractor.sampling_rate:
+        raw_datasets = raw_datasets.cast_column(
+            data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
+        )
+    # derive max & min input length for sample rate & max duration
+    max_input_length = data_args.max_duration_in_seconds * feature_extractor.sampling_rate
+    min_input_length = data_args.min_duration_in_seconds * feature_extractor.sampling_rate
+    audio_column_name = data_args.audio_column_name
+    num_workers = data_args.preprocessing_num_workers
+    # `phoneme_language` is only relevant if the model is fine-tuned on phoneme classification
+    phoneme_language = data_args.phoneme_language
+    # Preprocessing the datasets.
+    # We need to read the audio files as arrays and tokenize the targets.
+    def prepare_dataset(batch):
+        # load audio
+        sample = batch[audio_column_name]
+        inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
+        batch["input_values"] = inputs.input_values[0]
+        batch["input_length"] = len(batch["input_values"])
+        # encode targets
+        additional_kwargs = {}
+        if phoneme_language is not None:
+            additional_kwargs["phonemizer_lang"] = phoneme_language
+        batch["labels"] = tokenizer(batch["target_text"], **additional_kwargs).input_ids
+        return batch
+    with training_args.main_process_first(desc="dataset map preprocessing"):
+        def is_text_still_present(string):
+            return len(string) > 5
+        # filter rows that have less than 5 characters after filtering.
+        raw_datasets = raw_datasets.filter(
+            is_text_still_present,
+            num_proc=num_workers,
+            input_columns=["target_text"],
+        )
+        vectorized_datasets = raw_datasets.map(
+            prepare_dataset,
+            remove_columns=next(iter(raw_datasets.values())).column_names,
+            num_proc=num_workers,
+            desc="preprocess datasets",
+        )
+        def is_audio_in_length_range(length):
+            return length > min_input_length and length < max_input_length
+        # filter data that is shorter than min_input_length
+        vectorized_datasets = vectorized_datasets.filter(
+            is_audio_in_length_range,
+            num_proc=num_workers,
+            input_columns=["input_length"],
+        )
+    # 7. Next, we can prepare the training.
+    # Let's use word error rate (WER) as our evaluation metric,
+    # instantiate a data collator and the trainer
+    # Define evaluation metrics during training, *i.e.* word error rate, character error rate
+    eval_metrics = {metric: load_metric(metric) for metric in data_args.eval_metrics}
+    # for large datasets it is advised to run the preprocessing on a
+    # single machine first with ``args.preprocessing_only`` since there will mostly likely
+    # be a timeout when running the script in distributed mode.
+    # In a second step ``args.preprocessing_only`` can then be set to `False` to load the
+    # cached dataset
+    if data_args.preprocessing_only:
+        logger.info(f"Data preprocessing finished. Files cached at {vectorized_datasets.cache_files}")
+        return
+    def compute_metrics(pred):
+        pred_logits = pred.predictions
+        pred_ids = np.argmax(pred_logits, axis=-1)
+        pred.label_ids[pred.label_ids == -100] = tokenizer.pad_token_id
+        pred_str = tokenizer.batch_decode(pred_ids)
+        # we do not want to group tokens when computing the metrics
+        label_str = tokenizer.batch_decode(pred.label_ids, group_tokens=False)
+        metrics = {k: v.compute(predictions=pred_str, references=label_str) for k, v in eval_metrics.items()}
+        return metrics
+    # Now save everything to be able to create a single processor later
+    if is_main_process(training_args.local_rank):
+        # save feature extractor, tokenizer and config
+        feature_extractor.save_pretrained(training_args.output_dir)
+        tokenizer.save_pretrained(training_args.output_dir)
+        config.save_pretrained(training_args.output_dir)
+    try:
+        processor = AutoProcessor.from_pretrained(training_args.output_dir)
+    except (OSError, KeyError):
+        warnings.warn(
+            "Loading a processor from a feature extractor config that does not"
+            " include a `processor_class` attribute is deprecated and will be removed in v5. Please add the following "
+            " attribute to your `preprocessor_config.json` file to suppress this warning: "
+            " `'processor_class': 'Wav2Vec2Processor'`",
+            FutureWarning,
+        )
+        processor = Wav2Vec2Processor.from_pretrained(training_args.output_dir)
+    # Instantiate custom data collator
+    data_collator = DataCollatorCTCWithPadding(processor=processor)
+    # Initialize Trainer
+    trainer = Trainer(
+        model=model,
+        data_collator=data_collator,
+        args=training_args,
+        compute_metrics=compute_metrics,
+        train_dataset=vectorized_datasets["train"] if training_args.do_train else None,
+        eval_dataset=vectorized_datasets["eval"] if training_args.do_eval else None,
+        tokenizer=feature_extractor,
+    )
+    # 8. Finally, we can start training
+    # Training
+    if training_args.do_train:
+        # use last checkpoint if exist
+        if last_checkpoint is not None:
+            checkpoint = last_checkpoint
+        elif os.path.isdir(model_args.model_name_or_path):
+            checkpoint = model_args.model_name_or_path
+        else:
+            checkpoint = None
+        train_result = trainer.train(resume_from_checkpoint=checkpoint)
+        trainer.save_model()
+        metrics = train_result.metrics
+        max_train_samples = (
+            data_args.max_train_samples
+            if data_args.max_train_samples is not None
+            else len(vectorized_datasets["train"])
+        )
+        metrics["train_samples"] = min(max_train_samples, len(vectorized_datasets["train"]))
+        trainer.log_metrics("train", metrics)
+        trainer.save_metrics("train", metrics)
+        trainer.save_state()
+    # Evaluation
+    results = {}
+    if training_args.do_eval:
+        logger.info("*** Evaluate ***")
+        metrics = trainer.evaluate()
+        max_eval_samples = (
+            data_args.max_eval_samples if data_args.max_eval_samples is not None else len(vectorized_datasets["eval"])
+        )
+        metrics["eval_samples"] = min(max_eval_samples, len(vectorized_datasets["eval"]))
+        trainer.log_metrics("eval", metrics)
+        trainer.save_metrics("eval", metrics)
+    # Write model card and (optionally) push to hub
+    config_name = data_args.dataset_config_name if data_args.dataset_config_name is not None else "na"
+    kwargs = {
+        "finetuned_from": model_args.model_name_or_path,
+        "tasks": "speech-recognition",
+        "tags": ["automatic-speech-recognition", data_args.dataset_name],
+        "dataset_args": f"Config: {config_name}, Training split: {data_args.train_split_name}, Eval split: {data_args.eval_split_name}",
+        "dataset": f"{data_args.dataset_name.upper()} - {config_name.upper()}",
+    }
+    if "common_voice" in data_args.dataset_name:
+        kwargs["language"] = config_name
+    if training_args.push_to_hub:
+        trainer.push_to_hub(**kwargs)
+    else:
+        trainer.create_model_card(**kwargs)
+    return results
+if __name__ == "__main__":
+    main()

.ipynb_checkpoints/run_training-checkpoint.sh CHANGED Viewed

@@ -3,7 +3,8 @@ python run_speech_recognition_ctc.py \
 	--model_name_or_path="facebook/wav2vec2-xls-r-1b" \
 	--dataset_config_name="de" \
 	--output_dir="./" \
-	--num_train_epochs="2" \
 	--per_device_train_batch_size="8" \
 	--per_device_eval_batch_size="8" \
 	--gradient_accumulation_steps="4" \

 	--model_name_or_path="facebook/wav2vec2-xls-r-1b" \
 	--dataset_config_name="de" \
 	--output_dir="./" \
+    --overwrite_output_dir \
+	--num_train_epochs="2.5" \
 	--per_device_train_batch_size="8" \
 	--per_device_eval_batch_size="8" \
 	--gradient_accumulation_steps="4" \

.ipynb_checkpoints/special_tokens_map-checkpoint.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}

.ipynb_checkpoints/vocab-checkpoint.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"!": 1, "\"": 2, "$": 3, "%": 4, "&": 5, "'": 6, "(": 7, ")": 8, "*": 9, "+": 10, ",": 11, "-": 12, ".": 13, "/": 14, ":": 15, ";": 16, "=": 17, "?": 18, "@": 19, "[": 20, "]": 21, "_": 22, "`": 23, "a": 24, "b": 25, "c": 26, "d": 27, "e": 28, "f": 29, "g": 30, "h": 31, "i": 32, "j": 33, "k": 34, "l": 35, "m": 36, "n": 37, "o": 38, "p": 39, "q": 40, "r": 41, "s": 42, "t": 43, "u": 44, "v": 45, "w": 46, "x": 47, "y": 48, "z": 49, "\u00a1": 50, "\u00a7": 51, "\u00ab": 52, "\u00b0": 53, "\u00b4": 54, "\u00b5": 55, "\u00b7": 56, "\u00bb": 57, "\u00d7": 58, "\u00df": 59, "\u00e0": 60, "\u00e1": 61, "\u00e2": 62, "\u00e3": 63, "\u00e4": 64, "\u00e5": 65, "\u00e6": 66, "\u00e7": 67, "\u00e8": 68, "\u00e9": 69, "\u00ea": 70, "\u00eb": 71, "\u00ec": 72, "\u00ed": 73, "\u00ee": 74, "\u00ef": 75, "\u00f0": 76, "\u00f1": 77, "\u00f2": 78, "\u00f3": 79, "\u00f4": 80, "\u00f5": 81, "\u00f6": 82, "\u00f8": 83, "\u00f9": 84, "\u00fa": 85, "\u00fb": 86, "\u00fc": 87, "\u00fd": 88, "\u00fe": 89, "\u0101": 90, "\u0103": 91, "\u0105": 92, "\u0107": 93, "\u010d": 94, "\u010f": 95, "\u0111": 96, "\u0113": 97, "\u0117": 98, "\u0119": 99, "\u011b": 100, "\u011f": 101, "\u0121": 102, "\u0127": 103, "\u012b": 104, "\u0131": 105, "\u0142": 106, "\u0144": 107, "\u0146": 108, "\u0148": 109, "\u014d": 110, "\u014f": 111, "\u0151": 112, "\u0153": 113, "\u0159": 114, "\u015b": 115, "\u015f": 116, "\u0161": 117, "\u0165": 118, "\u016b": 119, "\u016f": 120, "\u017a": 121, "\u017c": 122, "\u017e": 123, "\u01a1": 124, "\u01d0": 125, "\u01d4": 126, "\u0219": 127, "\u021b": 128, "\u0259": 129, "\u02bb": 130, "\u02be": 131, "\u02bf": 132, "\u0306": 133, "\u0307": 134, "\u0325": 135, "\u0430": 136, "\u0432": 137, "\u0435": 138, "\u0438": 139, "\u043a": 140, "\u043c": 141, "\u043e": 142, "\u0440": 143, "\u0441": 144, "\u0444": 145, "\u0447": 146, "\u0448": 147, "\u0479": 148, "\u05d0": 149, "\u05d1": 150, "\u05e0": 151, "\u05e2": 152, "\u05e9": 153, "\u0f0b": 154, "\u0f53": 155, "\u1e2b": 156, "\u1e5f": 157, "\u1e63": 158, "\u1e6d": 159, "\u1ea1": 160, "\u1ea3": 161, "\u1eaf": 162, "\u1eb1": 163, "\u1ebf": 164, "\u1ec5": 165, "\u1ec7": 166, "\u1ecd": 167, "\u1ed3": 168, "\u1ed9": 169, "\u1ee5": 170, "\u1ee9": 171, "\u2011": 172, "\u2013": 173, "\u2014": 174, "\u2018": 175, "\u2019": 176, "\u201a": 177, "\u201c": 178, "\u201d": 179, "\u201e": 180, "\u201f": 181, "\u2026": 182, "\u2032": 183, "\u2033": 184, "\u2039": 185, "\u203a": 186, "\u2192": 187, "\u2212": 188, "\u2261": 189, "\u27e8": 190, "\u27e9": 191, "\u30ab": 192, "\u4e1c": 193, "\u4e34": 194, "\u4e61": 195, "\u5173": 196, "\u5408": 197, "\u57ce": 198, "\u5b59": 199, "\u5c23": 200, "\u5e7a": 201, "\u652f": 202, "\u6bd4": 203, "\u6bdb": 204, "\u6cfd": 205, "\u7121": 206, "\u751f": 207, "\u81e3": 208, "\u8fb6": 209, "\u9053": 210, "\u9547": 211, "\u9ec3": 212, "|": 0, "[UNK]": 213, "[PAD]": 214}

added_tokens.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"<s>": ~~191~~, "</s>": ~~192~~}


1	+ {"<s>": 70, "</s>": 71}

config.json CHANGED Viewed

@@ -76,7 +76,7 @@
   "num_hidden_layers": 48,
   "num_negatives": 100,
   "output_hidden_size": 1280,
-  "pad_token_id": 190,
   "proj_codevector_dim": 1024,
   "tdnn_dilation": [
     1,
@@ -102,6 +102,6 @@
   "torch_dtype": "float32",
   "transformers_version": "4.17.0.dev0",
   "use_weighted_layer_sum": false,
-  "vocab_size": 193,
   "xvector_output_dim": 512
 }

   "num_hidden_layers": 48,
   "num_negatives": 100,
   "output_hidden_size": 1280,
+  "pad_token_id": 69,
   "proj_codevector_dim": 1024,
   "tdnn_dilation": [
     1,
   "torch_dtype": "float32",
   "transformers_version": "4.17.0.dev0",
   "use_weighted_layer_sum": false,
+  "vocab_size": 72,
   "xvector_output_dim": 512
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c1ed4c2cebf9ed5304c0b72a8fc20e81595486f988de3f6e750ce22ad251c158
-size 3851301681

 version https://git-lfs.github.com/spec/v1
+oid sha256:64a8d46236324825c647641375f6b303a64c6787dc4b05b5d6eb4f95910b8b10
+size 3850681649

run_speech_recognition_ctc.py CHANGED Viewed

@@ -318,6 +318,7 @@ def create_vocabulary_from_data(
         batch_size=-1,
         keep_in_memory=True,
         remove_columns=datasets["train"].column_names,
     )
     # take union of all unique characters in each dataset
@@ -434,8 +435,10 @@ def main():
     # that make training complicated and do not help in transcribing the speech
     # E.g. characters, such as `,` and `.` do not really have an acoustic characteristic
     # that could be easily picked up by the model
     chars_to_ignore_regex = (
-        f'[{"".join(data_args.chars_to_ignore)}]' if data_args.chars_to_ignore is not None else None
     )
     text_column_name = data_args.text_column_name
@@ -451,6 +454,7 @@ def main():
             remove_special_characters,
             remove_columns=[text_column_name],
             desc="remove special characters from datasets",
         )
     # save special tokens for tokenizer
@@ -592,6 +596,17 @@ def main():
         return batch
     with training_args.main_process_first(desc="dataset map preprocessing"):
         vectorized_datasets = raw_datasets.map(
             prepare_dataset,
             remove_columns=next(iter(raw_datasets.values())).column_names,
@@ -608,6 +623,8 @@ def main():
             num_proc=num_workers,
             input_columns=["input_length"],
         )
     # 7. Next, we can prepare the training.
     # Let's use word error rate (WER) as our evaluation metric,

         batch_size=-1,
         keep_in_memory=True,
         remove_columns=datasets["train"].column_names,
+        load_from_cache_file=False
     )
     # take union of all unique characters in each dataset
     # that make training complicated and do not help in transcribing the speech
     # E.g. characters, such as `,` and `.` do not really have an acoustic characteristic
     # that could be easily picked up by the model
+    odd_chars_regex_string = '$&()*+.\/=@\[\]_`¡§«°´µ·»×àáâãåæçèéêëìíîïðñòóôõøùúûýþāăąćčďđēėęěğġħīıłńņňōŏőœřśşšťūůźżžơǐǔșțəʻʾʿ̥̆̇авеикморсфчшѹאבנעש་ནḫṟṣṭạảắằếễệọồộụứ‑‚„‟′″‹›→−≡⟨⟩カ东临乡关合城孙尣幺支比毛泽無生臣辶道镇黃'
     chars_to_ignore_regex = (
+        f'[{"".join(data_args.chars_to_ignore)+ odd_chars_regex_string}]' if data_args.chars_to_ignore is not None else None
     )
     text_column_name = data_args.text_column_name
             remove_special_characters,
             remove_columns=[text_column_name],
             desc="remove special characters from datasets",
+            load_from_cache_file=False
         )
     # save special tokens for tokenizer
         return batch
     with training_args.main_process_first(desc="dataset map preprocessing"):
+        def is_text_still_present(string):
+            return len(string) > 5
+        # filter rows that have less than 5 characters after filtering.
+        raw_datasets = raw_datasets.filter(
+            is_text_still_present,
+            num_proc=num_workers,
+            input_columns=["target_text"],
+        )
         vectorized_datasets = raw_datasets.map(
             prepare_dataset,
             remove_columns=next(iter(raw_datasets.values())).column_names,
             num_proc=num_workers,
             input_columns=["input_length"],
         )
     # 7. Next, we can prepare the training.
     # Let's use word error rate (WER) as our evaluation metric,

run_training.sh CHANGED Viewed

@@ -3,7 +3,8 @@ python run_speech_recognition_ctc.py \
 	--model_name_or_path="facebook/wav2vec2-xls-r-1b" \
 	--dataset_config_name="de" \
 	--output_dir="./" \
-	--num_train_epochs="2" \
 	--per_device_train_batch_size="8" \
 	--per_device_eval_batch_size="8" \
 	--gradient_accumulation_steps="4" \

 	--model_name_or_path="facebook/wav2vec2-xls-r-1b" \
 	--dataset_config_name="de" \
 	--output_dir="./" \
+    --overwrite_output_dir \
+	--num_train_epochs="2.5" \
 	--per_device_train_batch_size="8" \
 	--per_device_eval_batch_size="8" \
 	--gradient_accumulation_steps="4" \

special_tokens_map.json CHANGED Viewed

@@ -1 +1 @@

- {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}~~, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}~~]}

trainer_state.json DELETED Viewed

@@ -1,1900 +0,0 @@
-{
-  "best_metric": null,
-  "best_model_checkpoint": null,
-  "epoch": 1.9999816584435355,
-  "global_step": 27260,
-  "is_hyper_param_search": false,
-  "is_local_process_zero": true,
-  "is_world_process_zero": true,
-  "log_history": [
-    {
-      "epoch": 0.01,
-      "learning_rate": 3.675e-06,
-      "loss": 11.4989,
-      "step": 100
-    },
-    {
-      "epoch": 0.01,
-      "learning_rate": 7.425e-06,
-      "loss": 3.2394,
-      "step": 200
-    },
-    {
-      "epoch": 0.02,
-      "learning_rate": 1.1174999999999999e-05,
-      "loss": 3.0303,
-      "step": 300
-    },
-    {
-      "epoch": 0.03,
-      "learning_rate": 1.4925e-05,
-      "loss": 2.9052,
-      "step": 400
-    },
-    {
-      "epoch": 0.04,
-      "learning_rate": 1.8675e-05,
-      "loss": 2.1033,
-      "step": 500
-    },
-    {
-      "epoch": 0.04,
-      "learning_rate": 2.2424999999999996e-05,
-      "loss": 1.674,
-      "step": 600
-    },
-    {
-      "epoch": 0.05,
-      "learning_rate": 2.6174999999999996e-05,
-      "loss": 1.5568,
-      "step": 700
-    },
-    {
-      "epoch": 0.06,
-      "learning_rate": 2.9925e-05,
-      "loss": 1.4654,
-      "step": 800
-    },
-    {
-      "epoch": 0.07,
-      "learning_rate": 3.3675e-05,
-      "loss": 1.3031,
-      "step": 900
-    },
-    {
-      "epoch": 0.07,
-      "learning_rate": 3.7424999999999995e-05,
-      "loss": 1.1842,
-      "step": 1000
-    },
-    {
-      "epoch": 0.07,
-      "eval_loss": 0.44609957933425903,
-      "eval_runtime": 1053.3237,
-      "eval_samples_per_second": 15.197,
-      "eval_steps_per_second": 1.9,
-      "eval_wer": 0.49177182344586473,
-      "step": 1000
-    },
-    {
-      "epoch": 0.08,
-      "learning_rate": 4.1175e-05,
-      "loss": 1.1329,
-      "step": 1100
-    },
-    {
-      "epoch": 0.09,
-      "learning_rate": 4.4924999999999994e-05,
-      "loss": 1.1316,
-      "step": 1200
-    },
-    {
-      "epoch": 0.1,
-      "learning_rate": 4.8675e-05,
-      "loss": 1.1092,
-      "step": 1300
-    },
-    {
-      "epoch": 0.1,
-      "learning_rate": 5.2424999999999994e-05,
-      "loss": 1.1215,
-      "step": 1400
-    },
-    {
-      "epoch": 0.11,
-      "learning_rate": 5.6175e-05,
-      "loss": 1.1165,
-      "step": 1500
-    },
-    {
-      "epoch": 0.12,
-      "learning_rate": 5.9925e-05,
-      "loss": 1.0946,
-      "step": 1600
-    },
-    {
-      "epoch": 0.12,
-      "learning_rate": 6.367499999999999e-05,
-      "loss": 1.1189,
-      "step": 1700
-    },
-    {
-      "epoch": 0.13,
-      "learning_rate": 6.7425e-05,
-      "loss": 1.1175,
-      "step": 1800
-    },
-    {
-      "epoch": 0.14,
-      "learning_rate": 7.1175e-05,
-      "loss": 1.1254,
-      "step": 1900
-    },
-    {
-      "epoch": 0.15,
-      "learning_rate": 7.492499999999999e-05,
-      "loss": 1.1317,
-      "step": 2000
-    },
-    {
-      "epoch": 0.15,
-      "eval_loss": 0.2668535113334656,
-      "eval_runtime": 988.5751,
-      "eval_samples_per_second": 16.192,
-      "eval_steps_per_second": 2.024,
-      "eval_wer": 0.2748006118212608,
-      "step": 2000
-    },
-    {
-      "epoch": 0.15,
-      "learning_rate": 7.470902612826603e-05,
-      "loss": 1.1296,
-      "step": 2100
-    },
-    {
-      "epoch": 0.16,
-      "learning_rate": 7.441211401425178e-05,
-      "loss": 1.1406,
-      "step": 2200
-    },
-    {
-      "epoch": 0.17,
-      "learning_rate": 7.411520190023751e-05,
-      "loss": 1.1362,
-      "step": 2300
-    },
-    {
-      "epoch": 0.18,
-      "learning_rate": 7.381828978622327e-05,
-      "loss": 1.1292,
-      "step": 2400
-    },
-    {
-      "epoch": 0.18,
-      "learning_rate": 7.352137767220902e-05,
-      "loss": 1.105,
-      "step": 2500
-    },
-    {
-      "epoch": 0.19,
-      "learning_rate": 7.322446555819477e-05,
-      "loss": 1.1231,
-      "step": 2600
-    },
-    {
-      "epoch": 0.2,
-      "learning_rate": 7.292755344418051e-05,
-      "loss": 1.1187,
-      "step": 2700
-    },
-    {
-      "epoch": 0.21,
-      "learning_rate": 7.263064133016626e-05,
-      "loss": 1.1339,
-      "step": 2800
-    },
-    {
-      "epoch": 0.21,
-      "learning_rate": 7.233372921615201e-05,
-      "loss": 1.1241,
-      "step": 2900
-    },
-    {
-      "epoch": 0.22,
-      "learning_rate": 7.203681710213777e-05,
-      "loss": 1.1029,
-      "step": 3000
-    },
-    {
-      "epoch": 0.22,
-      "eval_loss": 0.2638496458530426,
-      "eval_runtime": 987.5568,
-      "eval_samples_per_second": 16.209,
-      "eval_steps_per_second": 2.026,
-      "eval_wer": 0.2705875122910521,
-      "step": 3000
-    },
-    {
-      "epoch": 0.23,
-      "learning_rate": 7.173990498812351e-05,
-      "loss": 1.1215,
-      "step": 3100
-    },
-    {
-      "epoch": 0.23,
-      "learning_rate": 7.144299287410925e-05,
-      "loss": 1.1067,
-      "step": 3200
-    },
-    {
-      "epoch": 0.24,
-      "learning_rate": 7.114608076009501e-05,
-      "loss": 1.1126,
-      "step": 3300
-    },
-    {
-      "epoch": 0.25,
-      "learning_rate": 7.084916864608076e-05,
-      "loss": 1.109,
-      "step": 3400
-    },
-    {
-      "epoch": 0.26,
-      "learning_rate": 7.05522565320665e-05,
-      "loss": 1.1077,
-      "step": 3500
-    },
-    {
-      "epoch": 0.26,
-      "learning_rate": 7.025534441805225e-05,
-      "loss": 1.1,
-      "step": 3600
-    },
-    {
-      "epoch": 0.27,
-      "learning_rate": 6.9958432304038e-05,
-      "loss": 1.1061,
-      "step": 3700
-    },
-    {
-      "epoch": 0.28,
-      "learning_rate": 6.966152019002374e-05,
-      "loss": 1.103,
-      "step": 3800
-    },
-    {
-      "epoch": 0.29,
-      "learning_rate": 6.936460807600949e-05,
-      "loss": 1.0947,
-      "step": 3900
-    },
-    {
-      "epoch": 0.29,
-      "learning_rate": 6.906769596199525e-05,
-      "loss": 1.0949,
-      "step": 4000
-    },
-    {
-      "epoch": 0.29,
-      "eval_loss": 0.25188884139060974,
-      "eval_runtime": 996.0428,
-      "eval_samples_per_second": 16.071,
-      "eval_steps_per_second": 2.009,
-      "eval_wer": 0.26274172402490986,
-      "step": 4000
-    },
-    {
-      "epoch": 0.3,
-      "learning_rate": 6.8770783847981e-05,
-      "loss": 1.1076,
-      "step": 4100
-    },
-    {
-      "epoch": 0.31,
-      "learning_rate": 6.847387173396674e-05,
-      "loss": 1.1012,
-      "step": 4200
-    },
-    {
-      "epoch": 0.32,
-      "learning_rate": 6.817695961995249e-05,
-      "loss": 1.081,
-      "step": 4300
-    },
-    {
-      "epoch": 0.32,
-      "learning_rate": 6.788004750593824e-05,
-      "loss": 1.0868,
-      "step": 4400
-    },
-    {
-      "epoch": 0.33,
-      "learning_rate": 6.758313539192398e-05,
-      "loss": 1.0956,
-      "step": 4500
-    },
-    {
-      "epoch": 0.34,
-      "learning_rate": 6.728622327790973e-05,
-      "loss": 1.0953,
-      "step": 4600
-    },
-    {
-      "epoch": 0.34,
-      "learning_rate": 6.698931116389548e-05,
-      "loss": 1.0952,
-      "step": 4700
-    },
-    {
-      "epoch": 0.35,
-      "learning_rate": 6.669239904988122e-05,
-      "loss": 1.0968,
-      "step": 4800
-    },
-    {
-      "epoch": 0.36,
-      "learning_rate": 6.639548693586698e-05,
-      "loss": 1.0827,
-      "step": 4900
-    },
-    {
-      "epoch": 0.37,
-      "learning_rate": 6.609857482185273e-05,
-      "loss": 1.0923,
-      "step": 5000
-    },
-    {
-      "epoch": 0.37,
-      "eval_loss": 0.24751192331314087,
-      "eval_runtime": 984.8205,
-      "eval_samples_per_second": 16.254,
-      "eval_steps_per_second": 2.032,
-      "eval_wer": 0.25769556429585927,
-      "step": 5000
-    },
-    {
-      "epoch": 0.37,
-      "learning_rate": 6.580166270783846e-05,
-      "loss": 1.0895,
-      "step": 5100
-    },
-    {
-      "epoch": 0.38,
-      "learning_rate": 6.550771971496436e-05,
-      "loss": 1.0851,
-      "step": 5200
-    },
-    {
-      "epoch": 0.39,
-      "learning_rate": 6.521080760095011e-05,
-      "loss": 1.1124,
-      "step": 5300
-    },
-    {
-      "epoch": 0.4,
-      "learning_rate": 6.491686460807601e-05,
-      "loss": 1.0809,
-      "step": 5400
-    },
-    {
-      "epoch": 0.4,
-      "learning_rate": 6.461995249406176e-05,
-      "loss": 1.0985,
-      "step": 5500
-    },
-    {
-      "epoch": 0.41,
-      "learning_rate": 6.432304038004749e-05,
-      "loss": 1.086,
-      "step": 5600
-    },
-    {
-      "epoch": 0.42,
-      "learning_rate": 6.402612826603325e-05,
-      "loss": 1.0823,
-      "step": 5700
-    },
-    {
-      "epoch": 0.43,
-      "learning_rate": 6.3729216152019e-05,
-      "loss": 1.0732,
-      "step": 5800
-    },
-    {
-      "epoch": 0.43,
-      "learning_rate": 6.343230403800475e-05,
-      "loss": 1.076,
-      "step": 5900
-    },
-    {
-      "epoch": 0.44,
-      "learning_rate": 6.313539192399049e-05,
-      "loss": 1.0847,
-      "step": 6000
-    },
-    {
-      "epoch": 0.44,
-      "eval_loss": 0.24355509877204895,
-      "eval_runtime": 984.3756,
-      "eval_samples_per_second": 16.261,
-      "eval_steps_per_second": 2.033,
-      "eval_wer": 0.26121217087293785,
-      "step": 6000
-    },
-    {
-      "epoch": 0.45,
-      "learning_rate": 6.283847980997624e-05,
-      "loss": 1.0748,
-      "step": 6100
-    },
-    {
-      "epoch": 0.45,
-      "learning_rate": 6.254156769596199e-05,
-      "loss": 1.0836,
-      "step": 6200
-    },
-    {
-      "epoch": 0.46,
-      "learning_rate": 6.224465558194773e-05,
-      "loss": 1.084,
-      "step": 6300
-    },
-    {
-      "epoch": 0.47,
-      "learning_rate": 6.194774346793349e-05,
-      "loss": 1.0649,
-      "step": 6400
-    },
-    {
-      "epoch": 0.48,
-      "learning_rate": 6.165083135391923e-05,
-      "loss": 1.0751,
-      "step": 6500
-    },
-    {
-      "epoch": 0.48,
-      "learning_rate": 6.135391923990499e-05,
-      "loss": 1.0773,
-      "step": 6600
-    },
-    {
-      "epoch": 0.49,
-      "learning_rate": 6.105700712589073e-05,
-      "loss": 1.095,
-      "step": 6700
-    },
-    {
-      "epoch": 0.5,
-      "learning_rate": 6.076009501187648e-05,
-      "loss": 1.0629,
-      "step": 6800
-    },
-    {
-      "epoch": 0.51,
-      "learning_rate": 6.0463182897862234e-05,
-      "loss": 1.0904,
-      "step": 6900
-    },
-    {
-      "epoch": 0.51,
-      "learning_rate": 6.0166270783847974e-05,
-      "loss": 1.0667,
-      "step": 7000
-    },
-    {
-      "epoch": 0.51,
-      "eval_loss": 0.24724909663200378,
-      "eval_runtime": 983.1677,
-      "eval_samples_per_second": 16.281,
-      "eval_steps_per_second": 2.035,
-      "eval_wer": 0.26608762154484866,
-      "step": 7000
-    },
-    {
-      "epoch": 0.52,
-      "learning_rate": 5.986935866983372e-05,
-      "loss": 1.0825,
-      "step": 7100
-    },
-    {
-      "epoch": 0.53,
-      "learning_rate": 5.9572446555819474e-05,
-      "loss": 1.0811,
-      "step": 7200
-    },
-    {
-      "epoch": 0.54,
-      "learning_rate": 5.927553444180522e-05,
-      "loss": 1.0906,
-      "step": 7300
-    },
-    {
-      "epoch": 0.54,
-      "learning_rate": 5.8978622327790975e-05,
-      "loss": 1.0784,
-      "step": 7400
-    },
-    {
-      "epoch": 0.55,
-      "learning_rate": 5.8681710213776715e-05,
-      "loss": 1.0822,
-      "step": 7500
-    },
-    {
-      "epoch": 0.56,
-      "learning_rate": 5.838479809976246e-05,
-      "loss": 1.0802,
-      "step": 7600
-    },
-    {
-      "epoch": 0.56,
-      "learning_rate": 5.8087885985748215e-05,
-      "loss": 1.0805,
-      "step": 7700
-    },
-    {
-      "epoch": 0.57,
-      "learning_rate": 5.779097387173396e-05,
-      "loss": 1.093,
-      "step": 7800
-    },
-    {
-      "epoch": 0.58,
-      "learning_rate": 5.749406175771971e-05,
-      "loss": 1.0456,
-      "step": 7900
-    },
-    {
-      "epoch": 0.59,
-      "learning_rate": 5.7197149643705455e-05,
-      "loss": 1.0709,
-      "step": 8000
-    },
-    {
-      "epoch": 0.59,
-      "eval_loss": 0.24887976050376892,
-      "eval_runtime": 982.4054,
-      "eval_samples_per_second": 16.294,
-      "eval_steps_per_second": 2.037,
-      "eval_wer": 0.26095269310608543,
-      "step": 8000
-    },
-    {
-      "epoch": 0.59,
-      "learning_rate": 5.690023752969121e-05,
-      "loss": 1.0677,
-      "step": 8100
-    },
-    {
-      "epoch": 0.6,
-      "learning_rate": 5.6603325415676956e-05,
-      "loss": 1.0659,
-      "step": 8200
-    },
-    {
-      "epoch": 0.61,
-      "learning_rate": 5.630641330166271e-05,
-      "loss": 1.0788,
-      "step": 8300
-    },
-    {
-      "epoch": 0.62,
-      "learning_rate": 5.600950118764845e-05,
-      "loss": 1.071,
-      "step": 8400
-    },
-    {
-      "epoch": 0.62,
-      "learning_rate": 5.5712589073634196e-05,
-      "loss": 1.0669,
-      "step": 8500
-    },
-    {
-      "epoch": 0.63,
-      "learning_rate": 5.541567695961995e-05,
-      "loss": 1.0728,
-      "step": 8600
-    },
-    {
-      "epoch": 0.64,
-      "learning_rate": 5.512173396674584e-05,
-      "loss": 1.0652,
-      "step": 8700
-    },
-    {
-      "epoch": 0.65,
-      "learning_rate": 5.482482185273159e-05,
-      "loss": 1.0632,
-      "step": 8800
-    },
-    {
-      "epoch": 0.65,
-      "learning_rate": 5.452790973871733e-05,
-      "loss": 1.048,
-      "step": 8900
-    },
-    {
-      "epoch": 0.66,
-      "learning_rate": 5.4230997624703083e-05,
-      "loss": 1.0472,
-      "step": 9000
-    },
-    {
-      "epoch": 0.66,
-      "eval_loss": 0.23543120920658112,
-      "eval_runtime": 985.8901,
-      "eval_samples_per_second": 16.236,
-      "eval_steps_per_second": 2.03,
-      "eval_wer": 0.24997268655085764,
-      "step": 9000
-    },
-    {
-      "epoch": 0.67,
-      "learning_rate": 5.393408551068883e-05,
-      "loss": 1.0552,
-      "step": 9100
-    },
-    {
-      "epoch": 0.67,
-      "learning_rate": 5.3637173396674584e-05,
-      "loss": 1.0581,
-      "step": 9200
-    },
-    {
-      "epoch": 0.68,
-      "learning_rate": 5.3340261282660324e-05,
-      "loss": 1.0658,
-      "step": 9300
-    },
-    {
-      "epoch": 0.69,
-      "learning_rate": 5.304334916864607e-05,
-      "loss": 1.0603,
-      "step": 9400
-    },
-    {
-      "epoch": 0.7,
-      "learning_rate": 5.2746437054631824e-05,
-      "loss": 1.0661,
-      "step": 9500
-    },
-    {
-      "epoch": 0.7,
-      "learning_rate": 5.244952494061757e-05,
-      "loss": 1.0554,
-      "step": 9600
-    },
-    {
-      "epoch": 0.71,
-      "learning_rate": 5.2152612826603325e-05,
-      "loss": 1.0728,
-      "step": 9700
-    },
-    {
-      "epoch": 0.72,
-      "learning_rate": 5.1855700712589065e-05,
-      "loss": 1.0513,
-      "step": 9800
-    },
-    {
-      "epoch": 0.73,
-      "learning_rate": 5.155878859857482e-05,
-      "loss": 1.0379,
-      "step": 9900
-    },
-    {
-      "epoch": 0.73,
-      "learning_rate": 5.1261876484560565e-05,
-      "loss": 1.0604,
-      "step": 10000
-    },
-    {
-      "epoch": 0.73,
-      "eval_loss": 0.23458585143089294,
-      "eval_runtime": 986.1525,
-      "eval_samples_per_second": 16.232,
-      "eval_steps_per_second": 2.029,
-      "eval_wer": 0.2485182453840271,
-      "step": 10000
-    },
-    {
-      "epoch": 0.74,
-      "learning_rate": 5.096496437054632e-05,
-      "loss": 1.0632,
-      "step": 10100
-    },
-    {
-      "epoch": 0.75,
-      "learning_rate": 5.0668052256532065e-05,
-      "loss": 1.0526,
-      "step": 10200
-    },
-    {
-      "epoch": 0.76,
-      "learning_rate": 5.0371140142517805e-05,
-      "loss": 1.0314,
-      "step": 10300
-    },
-    {
-      "epoch": 0.76,
-      "learning_rate": 5.007422802850356e-05,
-      "loss": 1.0508,
-      "step": 10400
-    },
-    {
-      "epoch": 0.77,
-      "learning_rate": 4.9777315914489306e-05,
-      "loss": 1.0446,
-      "step": 10500
-    },
-    {
-      "epoch": 0.78,
-      "learning_rate": 4.948040380047506e-05,
-      "loss": 1.0361,
-      "step": 10600
-    },
-    {
-      "epoch": 0.79,
-      "learning_rate": 4.91834916864608e-05,
-      "loss": 1.0319,
-      "step": 10700
-    },
-    {
-      "epoch": 0.79,
-      "learning_rate": 4.8886579572446546e-05,
-      "loss": 1.0178,
-      "step": 10800
-    },
-    {
-      "epoch": 0.8,
-      "learning_rate": 4.85896674584323e-05,
-      "loss": 1.0301,
-      "step": 10900
-    },
-    {
-      "epoch": 0.81,
-      "learning_rate": 4.8292755344418046e-05,
-      "loss": 1.0375,
-      "step": 11000
-    },
-    {
-      "epoch": 0.81,
-      "eval_loss": 0.2285824865102768,
-      "eval_runtime": 979.8277,
-      "eval_samples_per_second": 16.337,
-      "eval_steps_per_second": 2.042,
-      "eval_wer": 0.23898585163334427,
-      "step": 11000
-    },
-    {
-      "epoch": 0.81,
-      "learning_rate": 4.79958432304038e-05,
-      "loss": 1.0398,
-      "step": 11100
-    },
-    {
-      "epoch": 0.82,
-      "learning_rate": 4.769893111638954e-05,
-      "loss": 1.0308,
-      "step": 11200
-    },
-    {
-      "epoch": 0.83,
-      "learning_rate": 4.7402019002375294e-05,
-      "loss": 1.0309,
-      "step": 11300
-    },
-    {
-      "epoch": 0.84,
-      "learning_rate": 4.710510688836104e-05,
-      "loss": 1.0287,
-      "step": 11400
-    },
-    {
-      "epoch": 0.84,
-      "learning_rate": 4.6808194774346794e-05,
-      "loss": 1.0195,
-      "step": 11500
-    },
-    {
-      "epoch": 0.85,
-      "learning_rate": 4.651128266033254e-05,
-      "loss": 1.0292,
-      "step": 11600
-    },
-    {
-      "epoch": 0.86,
-      "learning_rate": 4.621437054631828e-05,
-      "loss": 1.0147,
-      "step": 11700
-    },
-    {
-      "epoch": 0.87,
-      "learning_rate": 4.5917458432304034e-05,
-      "loss": 1.0242,
-      "step": 11800
-    },
-    {
-      "epoch": 0.87,
-      "learning_rate": 4.562054631828978e-05,
-      "loss": 1.029,
-      "step": 11900
-    },
-    {
-      "epoch": 0.88,
-      "learning_rate": 4.5326603325415675e-05,
-      "loss": 1.0193,
-      "step": 12000
-    },
-    {
-      "epoch": 0.88,
-      "eval_loss": 0.22122837603092194,
-      "eval_runtime": 981.4673,
-      "eval_samples_per_second": 16.309,
-      "eval_steps_per_second": 2.039,
-      "eval_wer": 0.23376215448486834,
-      "step": 12000
-    },
-    {
-      "epoch": 0.89,
-      "learning_rate": 4.502969121140143e-05,
-      "loss": 1.0249,
-      "step": 12100
-    },
-    {
-      "epoch": 0.9,
-      "learning_rate": 4.473277909738717e-05,
-      "loss": 1.0165,
-      "step": 12200
-    },
-    {
-      "epoch": 0.9,
-      "learning_rate": 4.4435866983372915e-05,
-      "loss": 1.0303,
-      "step": 12300
-    },
-    {
-      "epoch": 0.91,
-      "learning_rate": 4.413895486935867e-05,
-      "loss": 1.0295,
-      "step": 12400
-    },
-    {
-      "epoch": 0.92,
-      "learning_rate": 4.3842042755344415e-05,
-      "loss": 1.0112,
-      "step": 12500
-    },
-    {
-      "epoch": 0.92,
-      "learning_rate": 4.35480997624703e-05,
-      "loss": 1.0056,
-      "step": 12600
-    },
-    {
-      "epoch": 0.93,
-      "learning_rate": 4.325118764845605e-05,
-      "loss": 1.0108,
-      "step": 12700
-    },
-    {
-      "epoch": 0.94,
-      "learning_rate": 4.29542755344418e-05,
-      "loss": 1.0133,
-      "step": 12800
-    },
-    {
-      "epoch": 0.95,
-      "learning_rate": 4.265736342042755e-05,
-      "loss": 1.0063,
-      "step": 12900
-    },
-    {
-      "epoch": 0.95,
-      "learning_rate": 4.23604513064133e-05,
-      "loss": 1.0077,
-      "step": 13000
-    },
-    {
-      "epoch": 0.95,
-      "eval_loss": 0.21520280838012695,
-      "eval_runtime": 983.9086,
-      "eval_samples_per_second": 16.269,
-      "eval_steps_per_second": 2.034,
-      "eval_wer": 0.22689282202556538,
-      "step": 13000
-    },
-    {
-      "epoch": 0.96,
-      "learning_rate": 4.206353919239904e-05,
-      "loss": 1.0085,
-      "step": 13100
-    },
-    {
-      "epoch": 0.97,
-      "learning_rate": 4.176662707838479e-05,
-      "loss": 1.011,
-      "step": 13200
-    },
-    {
-      "epoch": 0.98,
-      "learning_rate": 4.146971496437054e-05,
-      "loss": 1.0131,
-      "step": 13300
-    },
-    {
-      "epoch": 0.98,
-      "learning_rate": 4.117280285035629e-05,
-      "loss": 0.998,
-      "step": 13400
-    },
-    {
-      "epoch": 0.99,
-      "learning_rate": 4.0875890736342043e-05,
-      "loss": 1.0002,
-      "step": 13500
-    },
-    {
-      "epoch": 1.0,
-      "learning_rate": 4.0578978622327783e-05,
-      "loss": 0.9916,
-      "step": 13600
-    },
-    {
-      "epoch": 1.01,
-      "learning_rate": 4.028206650831354e-05,
-      "loss": 0.9662,
-      "step": 13700
-    },
-    {
-      "epoch": 1.01,
-      "learning_rate": 3.9985154394299284e-05,
-      "loss": 0.9758,
-      "step": 13800
-    },
-    {
-      "epoch": 1.02,
-      "learning_rate": 3.968824228028504e-05,
-      "loss": 1.013,
-      "step": 13900
-    },
-    {
-      "epoch": 1.03,
-      "learning_rate": 3.939133016627078e-05,
-      "loss": 1.0004,
-      "step": 14000
-    },
-    {
-      "epoch": 1.03,
-      "eval_loss": 0.2093251347541809,
-      "eval_runtime": 986.9604,
-      "eval_samples_per_second": 16.218,
-      "eval_steps_per_second": 2.027,
-      "eval_wer": 0.22069949743253578,
-      "step": 14000
-    },
-    {
-      "epoch": 1.03,
-      "learning_rate": 3.9094418052256524e-05,
-      "loss": 0.9852,
-      "step": 14100
-    },
-    {
-      "epoch": 1.04,
-      "learning_rate": 3.879750593824228e-05,
-      "loss": 0.9765,
-      "step": 14200
-    },
-    {
-      "epoch": 1.05,
-      "learning_rate": 3.8500593824228025e-05,
-      "loss": 0.9978,
-      "step": 14300
-    },
-    {
-      "epoch": 1.06,
-      "learning_rate": 3.820368171021378e-05,
-      "loss": 0.9807,
-      "step": 14400
-    },
-    {
-      "epoch": 1.06,
-      "learning_rate": 3.790676959619952e-05,
-      "loss": 0.9988,
-      "step": 14500
-    },
-    {
-      "epoch": 1.07,
-      "learning_rate": 3.7609857482185265e-05,
-      "loss": 0.977,
-      "step": 14600
-    },
-    {
-      "epoch": 1.08,
-      "learning_rate": 3.731294536817102e-05,
-      "loss": 0.9735,
-      "step": 14700
-    },
-    {
-      "epoch": 1.09,
-      "learning_rate": 3.7016033254156765e-05,
-      "loss": 0.9767,
-      "step": 14800
-    },
-    {
-      "epoch": 1.09,
-      "learning_rate": 3.671912114014251e-05,
-      "loss": 0.9555,
-      "step": 14900
-    },
-    {
-      "epoch": 1.1,
-      "learning_rate": 3.6422209026128266e-05,
-      "loss": 0.9649,
-      "step": 15000
-    },
-    {
-      "epoch": 1.1,
-      "eval_loss": 0.19932541251182556,
-      "eval_runtime": 986.5773,
-      "eval_samples_per_second": 16.225,
-      "eval_steps_per_second": 2.028,
-      "eval_wer": 0.21130367092756475,
-      "step": 15000
-    },
-    {
-      "epoch": 1.11,
-      "learning_rate": 3.612529691211401e-05,
-      "loss": 0.9608,
-      "step": 15100
-    },
-    {
-      "epoch": 1.12,
-      "learning_rate": 3.582838479809976e-05,
-      "loss": 0.9549,
-      "step": 15200
-    },
-    {
-      "epoch": 1.12,
-      "learning_rate": 3.5531472684085506e-05,
-      "loss": 0.9636,
-      "step": 15300
-    },
-    {
-      "epoch": 1.13,
-      "learning_rate": 3.523456057007125e-05,
-      "loss": 0.9605,
-      "step": 15400
-    },
-    {
-      "epoch": 1.14,
-      "learning_rate": 3.4937648456057006e-05,
-      "loss": 0.962,
-      "step": 15500
-    },
-    {
-      "epoch": 1.14,
-      "learning_rate": 3.464073634204275e-05,
-      "loss": 0.9565,
-      "step": 15600
-    },
-    {
-      "epoch": 1.15,
-      "learning_rate": 3.43438242280285e-05,
-      "loss": 0.9609,
-      "step": 15700
-    },
-    {
-      "epoch": 1.16,
-      "learning_rate": 3.404691211401425e-05,
-      "loss": 0.9552,
-      "step": 15800
-    },
-    {
-      "epoch": 1.17,
-      "learning_rate": 3.375e-05,
-      "loss": 0.9503,
-      "step": 15900
-    },
-    {
-      "epoch": 1.17,
-      "learning_rate": 3.345308788598574e-05,
-      "loss": 0.9509,
-      "step": 16000
-    },
-    {
-      "epoch": 1.17,
-      "eval_loss": 0.19342663884162903,
-      "eval_runtime": 984.1094,
-      "eval_samples_per_second": 16.265,
-      "eval_steps_per_second": 2.033,
-      "eval_wer": 0.20888643067846607,
-      "step": 16000
-    },
-    {
-      "epoch": 1.18,
-      "learning_rate": 3.3156175771971494e-05,
-      "loss": 0.9369,
-      "step": 16100
-    },
-    {
-      "epoch": 1.19,
-      "learning_rate": 3.285926365795724e-05,
-      "loss": 0.9549,
-      "step": 16200
-    },
-    {
-      "epoch": 1.2,
-      "learning_rate": 3.256235154394299e-05,
-      "loss": 0.9503,
-      "step": 16300
-    },
-    {
-      "epoch": 1.2,
-      "learning_rate": 3.226543942992874e-05,
-      "loss": 0.9553,
-      "step": 16400
-    },
-    {
-      "epoch": 1.21,
-      "learning_rate": 3.196852731591449e-05,
-      "loss": 0.9508,
-      "step": 16500
-    },
-    {
-      "epoch": 1.22,
-      "learning_rate": 3.1671615201900235e-05,
-      "loss": 0.9411,
-      "step": 16600
-    },
-    {
-      "epoch": 1.23,
-      "learning_rate": 3.137470308788598e-05,
-      "loss": 0.9435,
-      "step": 16700
-    },
-    {
-      "epoch": 1.23,
-      "learning_rate": 3.107779097387173e-05,
-      "loss": 0.9439,
-      "step": 16800
-    },
-    {
-      "epoch": 1.24,
-      "learning_rate": 3.078087885985748e-05,
-      "loss": 0.946,
-      "step": 16900
-    },
-    {
-      "epoch": 1.25,
-      "learning_rate": 3.048396674584323e-05,
-      "loss": 0.9533,
-      "step": 17000
-    },
-    {
-      "epoch": 1.25,
-      "eval_loss": 0.18736572563648224,
-      "eval_runtime": 984.7341,
-      "eval_samples_per_second": 16.255,
-      "eval_steps_per_second": 2.032,
-      "eval_wer": 0.20231071779744347,
-      "step": 17000
-    },
-    {
-      "epoch": 1.25,
-      "learning_rate": 3.018705463182898e-05,
-      "loss": 0.9322,
-      "step": 17100
-    },
-    {
-      "epoch": 1.26,
-      "learning_rate": 2.9890142517814722e-05,
-      "loss": 0.94,
-      "step": 17200
-    },
-    {
-      "epoch": 1.27,
-      "learning_rate": 2.9593230403800473e-05,
-      "loss": 0.9373,
-      "step": 17300
-    },
-    {
-      "epoch": 1.28,
-      "learning_rate": 2.9299287410926363e-05,
-      "loss": 0.924,
-      "step": 17400
-    },
-    {
-      "epoch": 1.28,
-      "learning_rate": 2.9005344418052253e-05,
-      "loss": 0.9357,
-      "step": 17500
-    },
-    {
-      "epoch": 1.29,
-      "learning_rate": 2.8708432304038003e-05,
-      "loss": 0.9351,
-      "step": 17600
-    },
-    {
-      "epoch": 1.3,
-      "learning_rate": 2.841152019002375e-05,
-      "loss": 0.9371,
-      "step": 17700
-    },
-    {
-      "epoch": 1.31,
-      "learning_rate": 2.81146080760095e-05,
-      "loss": 0.9253,
-      "step": 17800
-    },
-    {
-      "epoch": 1.31,
-      "learning_rate": 2.7817695961995246e-05,
-      "loss": 0.9264,
-      "step": 17900
-    },
-    {
-      "epoch": 1.32,
-      "learning_rate": 2.7520783847980997e-05,
-      "loss": 0.9248,
-      "step": 18000
-    },
-    {
-      "epoch": 1.32,
-      "eval_loss": 0.1818237155675888,
-      "eval_runtime": 1114.2718,
-      "eval_samples_per_second": 14.365,
-      "eval_steps_per_second": 1.796,
-      "eval_wer": 0.19742843876324703,
-      "step": 18000
-    },
-    {
-      "epoch": 1.33,
-      "learning_rate": 2.722387173396674e-05,
-      "loss": 0.9448,
-      "step": 18100
-    },
-    {
-      "epoch": 1.34,
-      "learning_rate": 2.692695961995249e-05,
-      "loss": 0.9284,
-      "step": 18200
-    },
-    {
-      "epoch": 1.34,
-      "learning_rate": 2.663004750593824e-05,
-      "loss": 0.9141,
-      "step": 18300
-    },
-    {
-      "epoch": 1.35,
-      "learning_rate": 2.6333135391923987e-05,
-      "loss": 0.9117,
-      "step": 18400
-    },
-    {
-      "epoch": 1.36,
-      "learning_rate": 2.6036223277909737e-05,
-      "loss": 0.917,
-      "step": 18500
-    },
-    {
-      "epoch": 1.36,
-      "learning_rate": 2.5739311163895484e-05,
-      "loss": 0.9165,
-      "step": 18600
-    },
-    {
-      "epoch": 1.37,
-      "learning_rate": 2.5442399049881234e-05,
-      "loss": 0.9099,
-      "step": 18700
-    },
-    {
-      "epoch": 1.38,
-      "learning_rate": 2.5145486935866978e-05,
-      "loss": 0.9022,
-      "step": 18800
-    },
-    {
-      "epoch": 1.39,
-      "learning_rate": 2.4848574821852728e-05,
-      "loss": 0.9246,
-      "step": 18900
-    },
-    {
-      "epoch": 1.39,
-      "learning_rate": 2.4551662707838478e-05,
-      "loss": 0.9216,
-      "step": 19000
-    },
-    {
-      "epoch": 1.39,
-      "eval_loss": 0.17756715416908264,
-      "eval_runtime": 1032.2412,
-      "eval_samples_per_second": 15.507,
-      "eval_steps_per_second": 1.939,
-      "eval_wer": 0.19256664481590735,
-      "step": 19000
-    },
-    {
-      "epoch": 1.4,
-      "learning_rate": 2.4254750593824225e-05,
-      "loss": 0.9142,
-      "step": 19100
-    },
-    {
-      "epoch": 1.41,
-      "learning_rate": 2.3957838479809975e-05,
-      "loss": 0.9275,
-      "step": 19200
-    },
-    {
-      "epoch": 1.42,
-      "learning_rate": 2.3660926365795722e-05,
-      "loss": 0.9132,
-      "step": 19300
-    },
-    {
-      "epoch": 1.42,
-      "learning_rate": 2.3364014251781472e-05,
-      "loss": 0.9111,
-      "step": 19400
-    },
-    {
-      "epoch": 1.43,
-      "learning_rate": 2.3067102137767216e-05,
-      "loss": 0.8974,
-      "step": 19500
-    },
-    {
-      "epoch": 1.44,
-      "learning_rate": 2.2770190023752966e-05,
-      "loss": 0.9013,
-      "step": 19600
-    },
-    {
-      "epoch": 1.45,
-      "learning_rate": 2.2473277909738716e-05,
-      "loss": 0.9093,
-      "step": 19700
-    },
-    {
-      "epoch": 1.45,
-      "learning_rate": 2.2176365795724463e-05,
-      "loss": 0.8926,
-      "step": 19800
-    },
-    {
-      "epoch": 1.46,
-      "learning_rate": 2.1879453681710213e-05,
-      "loss": 0.9026,
-      "step": 19900
-    },
-    {
-      "epoch": 1.47,
-      "learning_rate": 2.158254156769596e-05,
-      "loss": 0.8964,
-      "step": 20000
-    },
-    {
-      "epoch": 1.47,
-      "eval_loss": 0.1722368746995926,
-      "eval_runtime": 1019.2936,
-      "eval_samples_per_second": 15.704,
-      "eval_steps_per_second": 1.963,
-      "eval_wer": 0.19043619578280346,
-      "step": 20000
-    },
-    {
-      "epoch": 1.47,
-      "learning_rate": 2.128859857482185e-05,
-      "loss": 0.8906,
-      "step": 20100
-    },
-    {
-      "epoch": 1.48,
-      "learning_rate": 2.09916864608076e-05,
-      "loss": 0.8878,
-      "step": 20200
-    },
-    {
-      "epoch": 1.49,
-      "learning_rate": 2.0694774346793347e-05,
-      "loss": 0.9024,
-      "step": 20300
-    },
-    {
-      "epoch": 1.5,
-      "learning_rate": 2.0397862232779097e-05,
-      "loss": 0.8903,
-      "step": 20400
-    },
-    {
-      "epoch": 1.5,
-      "learning_rate": 2.0100950118764844e-05,
-      "loss": 0.8843,
-      "step": 20500
-    },
-    {
-      "epoch": 1.51,
-      "learning_rate": 1.9804038004750594e-05,
-      "loss": 0.8911,
-      "step": 20600
-    },
-    {
-      "epoch": 1.52,
-      "learning_rate": 1.9507125890736337e-05,
-      "loss": 0.8795,
-      "step": 20700
-    },
-    {
-      "epoch": 1.53,
-      "learning_rate": 1.9210213776722087e-05,
-      "loss": 0.8777,
-      "step": 20800
-    },
-    {
-      "epoch": 1.53,
-      "learning_rate": 1.8913301662707838e-05,
-      "loss": 0.889,
-      "step": 20900
-    },
-    {
-      "epoch": 1.54,
-      "learning_rate": 1.8616389548693584e-05,
-      "loss": 0.8941,
-      "step": 21000
-    },
-    {
-      "epoch": 1.54,
-      "eval_loss": 0.16895848512649536,
-      "eval_runtime": 1022.9987,
-      "eval_samples_per_second": 15.647,
-      "eval_steps_per_second": 1.956,
-      "eval_wer": 0.18521932699661314,
-      "step": 21000
-    },
-    {
-      "epoch": 1.55,
-      "learning_rate": 1.831947743467933e-05,
-      "loss": 0.882,
-      "step": 21100
-    },
-    {
-      "epoch": 1.56,
-      "learning_rate": 1.802256532066508e-05,
-      "loss": 0.8801,
-      "step": 21200
-    },
-    {
-      "epoch": 1.56,
-      "learning_rate": 1.772565320665083e-05,
-      "loss": 0.8718,
-      "step": 21300
-    },
-    {
-      "epoch": 1.57,
-      "learning_rate": 1.742874109263658e-05,
-      "loss": 0.8904,
-      "step": 21400
-    },
-    {
-      "epoch": 1.58,
-      "learning_rate": 1.7131828978622325e-05,
-      "loss": 0.8729,
-      "step": 21500
-    },
-    {
-      "epoch": 1.58,
-      "learning_rate": 1.6834916864608075e-05,
-      "loss": 0.8722,
-      "step": 21600
-    },
-    {
-      "epoch": 1.59,
-      "learning_rate": 1.6538004750593822e-05,
-      "loss": 0.8739,
-      "step": 21700
-    },
-    {
-      "epoch": 1.6,
-      "learning_rate": 1.624109263657957e-05,
-      "loss": 0.8635,
-      "step": 21800
-    },
-    {
-      "epoch": 1.61,
-      "learning_rate": 1.594418052256532e-05,
-      "loss": 0.8767,
-      "step": 21900
-    },
-    {
-      "epoch": 1.61,
-      "learning_rate": 1.564726840855107e-05,
-      "loss": 0.871,
-      "step": 22000
-    },
-    {
-      "epoch": 1.61,
-      "eval_loss": 0.16269078850746155,
-      "eval_runtime": 1042.6643,
-      "eval_samples_per_second": 15.352,
-      "eval_steps_per_second": 1.919,
-      "eval_wer": 0.17805637495902982,
-      "step": 22000
-    },
-    {
-      "epoch": 1.62,
-      "learning_rate": 1.5350356294536816e-05,
-      "loss": 0.8663,
-      "step": 22100
-    },
-    {
-      "epoch": 1.63,
-      "learning_rate": 1.5056413301662706e-05,
-      "loss": 0.8732,
-      "step": 22200
-    },
-    {
-      "epoch": 1.64,
-      "learning_rate": 1.4759501187648455e-05,
-      "loss": 0.8625,
-      "step": 22300
-    },
-    {
-      "epoch": 1.64,
-      "learning_rate": 1.4462589073634203e-05,
-      "loss": 0.854,
-      "step": 22400
-    },
-    {
-      "epoch": 1.65,
-      "learning_rate": 1.416567695961995e-05,
-      "loss": 0.8692,
-      "step": 22500
-    },
-    {
-      "epoch": 1.66,
-      "learning_rate": 1.38687648456057e-05,
-      "loss": 0.8477,
-      "step": 22600
-    },
-    {
-      "epoch": 1.67,
-      "learning_rate": 1.3571852731591449e-05,
-      "loss": 0.8494,
-      "step": 22700
-    },
-    {
-      "epoch": 1.67,
-      "learning_rate": 1.3277909738717339e-05,
-      "loss": 0.8599,
-      "step": 22800
-    },
-    {
-      "epoch": 1.68,
-      "learning_rate": 1.2980997624703087e-05,
-      "loss": 0.863,
-      "step": 22900
-    },
-    {
-      "epoch": 1.69,
-      "learning_rate": 1.2684085510688834e-05,
-      "loss": 0.847,
-      "step": 23000
-    },
-    {
-      "epoch": 1.69,
-      "eval_loss": 0.15907420217990875,
-      "eval_runtime": 1036.4519,
-      "eval_samples_per_second": 15.444,
-      "eval_steps_per_second": 1.931,
-      "eval_wer": 0.17514066426308314,
-      "step": 23000
-    },
-    {
-      "epoch": 1.69,
-      "learning_rate": 1.2387173396674582e-05,
-      "loss": 0.8487,
-      "step": 23100
-    },
-    {
-      "epoch": 1.7,
-      "learning_rate": 1.2090261282660333e-05,
-      "loss": 0.8637,
-      "step": 23200
-    },
-    {
-      "epoch": 1.71,
-      "learning_rate": 1.1793349168646081e-05,
-      "loss": 0.8456,
-      "step": 23300
-    },
-    {
-      "epoch": 1.72,
-      "learning_rate": 1.1496437054631828e-05,
-      "loss": 0.8518,
-      "step": 23400
-    },
-    {
-      "epoch": 1.72,
-      "learning_rate": 1.1199524940617576e-05,
-      "loss": 0.8456,
-      "step": 23500
-    },
-    {
-      "epoch": 1.73,
-      "learning_rate": 1.0902612826603325e-05,
-      "loss": 0.8349,
-      "step": 23600
-    },
-    {
-      "epoch": 1.74,
-      "learning_rate": 1.0605700712589072e-05,
-      "loss": 0.8426,
-      "step": 23700
-    },
-    {
-      "epoch": 1.75,
-      "learning_rate": 1.030878859857482e-05,
-      "loss": 0.8503,
-      "step": 23800
-    },
-    {
-      "epoch": 1.75,
-      "learning_rate": 1.001187648456057e-05,
-      "loss": 0.844,
-      "step": 23900
-    },
-    {
-      "epoch": 1.76,
-      "learning_rate": 9.714964370546319e-06,
-      "loss": 0.822,
-      "step": 24000
-    },
-    {
-      "epoch": 1.76,
-      "eval_loss": 0.1550702005624771,
-      "eval_runtime": 1027.8442,
-      "eval_samples_per_second": 15.573,
-      "eval_steps_per_second": 1.947,
-      "eval_wer": 0.17010133289631815,
-      "step": 24000
-    },
-    {
-      "epoch": 1.77,
-      "learning_rate": 9.418052256532066e-06,
-      "loss": 0.8452,
-      "step": 24100
-    },
-    {
-      "epoch": 1.78,
-      "learning_rate": 9.121140142517814e-06,
-      "loss": 0.843,
-      "step": 24200
-    },
-    {
-      "epoch": 1.78,
-      "learning_rate": 8.824228028503563e-06,
-      "loss": 0.8429,
-      "step": 24300
-    },
-    {
-      "epoch": 1.79,
-      "learning_rate": 8.527315914489311e-06,
-      "loss": 0.8513,
-      "step": 24400
-    },
-    {
-      "epoch": 1.8,
-      "learning_rate": 8.23040380047506e-06,
-      "loss": 0.834,
-      "step": 24500
-    },
-    {
-      "epoch": 1.8,
-      "learning_rate": 7.933491686460806e-06,
-      "loss": 0.8383,
-      "step": 24600
-    },
-    {
-      "epoch": 1.81,
-      "learning_rate": 7.636579572446555e-06,
-      "loss": 0.8294,
-      "step": 24700
-    },
-    {
-      "epoch": 1.82,
-      "learning_rate": 7.339667458432303e-06,
-      "loss": 0.8335,
-      "step": 24800
-    },
-    {
-      "epoch": 1.83,
-      "learning_rate": 7.042755344418052e-06,
-      "loss": 0.8207,
-      "step": 24900
-    },
-    {
-      "epoch": 1.83,
-      "learning_rate": 6.745843230403799e-06,
-      "loss": 0.8188,
-      "step": 25000
-    },
-    {
-      "epoch": 1.83,
-      "eval_loss": 0.1527515947818756,
-      "eval_runtime": 1034.5359,
-      "eval_samples_per_second": 15.473,
-      "eval_steps_per_second": 1.934,
-      "eval_wer": 0.16672812192723696,
-      "step": 25000
-    },
-    {
-      "epoch": 1.84,
-      "learning_rate": 6.448931116389549e-06,
-      "loss": 0.8289,
-      "step": 25100
-    },
-    {
-      "epoch": 1.85,
-      "learning_rate": 6.152019002375296e-06,
-      "loss": 0.8306,
-      "step": 25200
-    },
-    {
-      "epoch": 1.86,
-      "learning_rate": 5.855106888361045e-06,
-      "loss": 0.8335,
-      "step": 25300
-    },
-    {
-      "epoch": 1.86,
-      "learning_rate": 5.5581947743467925e-06,
-      "loss": 0.8291,
-      "step": 25400
-    },
-    {
-      "epoch": 1.87,
-      "learning_rate": 5.261282660332541e-06,
-      "loss": 0.8206,
-      "step": 25500
-    },
-    {
-      "epoch": 1.88,
-      "learning_rate": 4.9643705463182895e-06,
-      "loss": 0.8242,
-      "step": 25600
-    },
-    {
-      "epoch": 1.89,
-      "learning_rate": 4.667458432304038e-06,
-      "loss": 0.8189,
-      "step": 25700
-    },
-    {
-      "epoch": 1.89,
-      "learning_rate": 4.370546318289786e-06,
-      "loss": 0.8275,
-      "step": 25800
-    },
-    {
-      "epoch": 1.9,
-      "learning_rate": 4.073634204275534e-06,
-      "loss": 0.8142,
-      "step": 25900
-    },
-    {
-      "epoch": 1.91,
-      "learning_rate": 3.776722090261282e-06,
-      "loss": 0.8305,
-      "step": 26000
-    },
-    {
-      "epoch": 1.91,
-      "eval_loss": 0.14921718835830688,
-      "eval_runtime": 1026.6478,
-      "eval_samples_per_second": 15.592,
-      "eval_steps_per_second": 1.949,
-      "eval_wer": 0.16312957500273134,
-      "step": 26000
-    },
-    {
-      "epoch": 1.91,
-      "learning_rate": 3.4798099762470307e-06,
-      "loss": 0.833,
-      "step": 26100
-    },
-    {
-      "epoch": 1.92,
-      "learning_rate": 3.1828978622327788e-06,
-      "loss": 0.8175,
-      "step": 26200
-    },
-    {
-      "epoch": 1.93,
-      "learning_rate": 2.888954869358669e-06,
-      "loss": 0.8259,
-      "step": 26300
-    },
-    {
-      "epoch": 1.94,
-      "learning_rate": 2.5920427553444177e-06,
-      "loss": 0.8262,
-      "step": 26400
-    },
-    {
-      "epoch": 1.94,
-      "learning_rate": 2.295130641330166e-06,
-      "loss": 0.8223,
-      "step": 26500
-    },
-    {
-      "epoch": 1.95,
-      "learning_rate": 1.9982185273159142e-06,
-      "loss": 0.8285,
-      "step": 26600
-    },
-    {
-      "epoch": 1.96,
-      "learning_rate": 1.7013064133016625e-06,
-      "loss": 0.8226,
-      "step": 26700
-    },
-    {
-      "epoch": 1.97,
-      "learning_rate": 1.404394299287411e-06,
-      "loss": 0.8154,
-      "step": 26800
-    },
-    {
-      "epoch": 1.97,
-      "learning_rate": 1.107482185273159e-06,
-      "loss": 0.8176,
-      "step": 26900
-    },
-    {
-      "epoch": 1.98,
-      "learning_rate": 8.105700712589074e-07,
-      "loss": 0.8122,
-      "step": 27000
-    },
-    {
-      "epoch": 1.98,
-      "eval_loss": 0.14789555966854095,
-      "eval_runtime": 1030.7995,
-      "eval_samples_per_second": 15.529,
-      "eval_steps_per_second": 1.941,
-      "eval_wer": 0.16106740959248333,
-      "step": 27000
-    },
-    {
-      "epoch": 1.99,
-      "learning_rate": 5.136579572446555e-07,
-      "loss": 0.818,
-      "step": 27100
-    },
-    {
-      "epoch": 2.0,
-      "learning_rate": 2.167458432304038e-07,
-      "loss": 0.8284,
-      "step": 27200
-    },
-    {
-      "epoch": 2.0,
-      "step": 27260,
-      "total_flos": 4.0396309180498005e+20,
-      "train_loss": 0.32739020716330625,
-      "train_runtime": 49115.8494,
-      "train_samples_per_second": 17.761,
-      "train_steps_per_second": 0.555
-    }
-  ],
-  "max_steps": 27260,
-  "num_train_epochs": 2,
-  "total_flos": 4.0396309180498005e+20,
-  "trial_name": null,
-  "trial_params": null
-}

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5702656574d36c90da8eb2dc371da036eaa568fdda0350f1ed96dfd11f67b798
 size 2991

 version https://git-lfs.github.com/spec/v1
+oid sha256:3d671fb0f181e146452d1d68a46c3b54df59aa573465bc6cf0a59cb0e02b849a
 size 2991

vocab.json CHANGED Viewed

@@ -1 +1 @@

- {"=": 1, "@": 2, "[": 3, "]": 4, "_": 5, "`": 6, "a": 7, "b": 8, "c": 9, "d": 10, "e": 11, "f": 12, "g": 13, "h": 14, "i": 15, "j": 16, "k": 17, "l": 18, "m": 19, "n": 20, "o": 21, "p": 22, "q": 23, "r": 24, "s": 25, "t": 26, "u": 27, "v": 28, "w": 29, "x": 30, "y": 31, "z": 32, "¡": 33, "§": 34, "«": 35, "°": 36, "´": 37, "µ": 38, "·": 39, "»": 40, "×": 41, "ß": 42, "à": 43, "á": 44, "â": 45, "ã": 46, "ä": 47, "å": 48, "æ": 49, "ç": 50, "è": 51, "é": 52, "ê": 53, "ë": 54, "ì": 55, "í": 56, "î": 57, "ï": 58, "ð": 59, "ñ": 60, "ò": 61, "ó": 62, "ô": 63, "õ": 64, "ö": 65, "ø": 66, "ù": 67, "ú": 68, "û": 69, "ü": 70, "ý": 71, "þ": 72, "ā": 73, "ă": 74, "ą": 75, "ć": 76, "č": 77, "ď": 78, "đ": 79, "ē": 80, "ė": 81, "ę": 82, "ě": 83, "ğ": 84, "ġ": 85, "ħ": 86, "ī": 87, "ı": 88, "ł": 89, "ń": 90, "ņ": 91, "ň": 92, "ō": 93, "ŏ": 94, "ő": 95, "œ": 96, "ř": 97, "ś": 98, "ş": 99, "š": 100, "ť": 101, "ū": 102, "ů": 103, "ź": 104, "ż": 105, "ž": 106, "ơ": 107, "ǐ": 108, "ǔ": 109, "ș": 110, "ț": 111, "ə": 112, "ʻ": 113, "ʾ": 114, "ʿ": 115, "̆": 116, "̇": 117, "̥": 118, "а": 119, "в": 120, "е": 121, "и": 122, "к": 123, "м": 124, "о": 125, "р": 126, "с": 127, "ф": 128, "ч": 129, "ш": 130, "ѹ": 131, "א": 132, "ב": 133, "נ": 134, "ע": 135, "ש": 136, "་": 137, "ན": 138, "ḫ": 139, "ṟ": 140, "ṣ": 141, "ṭ": 142, "ạ": 143, "ả": 144, "ắ": 145, "ằ": 146, "ế": 147, "ễ": 148, "ệ": 149, "ọ": 150, "ồ": 151, "ộ": 152, "ụ": 153, "ứ": 154, "‑": 155, "‚": 156, "„": 157, "‟": 158, "′": 159, "″": 160, "‹": 161, "›": 162, "→": 163, "−": 164, "≡": 165, "⟨": 166, "⟩": 167, "カ": 168, "东": 169, "临": 170, "乡": 171, "关": 172, "合": 173, "城": 174, "孙": 175, "尣": 176, "幺": 177, "支": 178, "比": 179, "毛": 180, "泽": 181, "無": 182, "生": 183, "臣": 184, "辶": 185, "道": 186, "镇": 187, "黃": 188, "|": 0, "[UNK]": ~~189~~, "[PAD]": ~~190~~}

+ {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, "g": 7, "h": 8, "i": 9, "j": 10, "k": 11, "l": 12, "m": 13, "n": 14, "o": 15, "p": 16, "q": 17, "r": 18, "s": 19, "t": 20, "u": 21, "v": 22, "w": 23, "x": 24, "y": 25, "z": 26, "ß": 27, "à": 28, "á": 29, "â": 30, "ä": 31, "æ": 32, "ç": 33, "é": 34, "í": 35, "î": 36, "ó": 37, "ô": 38, "ö": 39, "ø": 40, "ú": 41, "ü": 42, "þ": 43, "ā": 44, "č": 45, "đ": 46, "ħ": 47, "ī": 48, "ł": 49, "ō": 50, "ő": 51, "œ": 52, "ř": 53, "ś": 54, "ş": 55, "š": 56, "ż": 57, "ž": 58, "ș": 59, "ț": 60, "ə": 61, "̇": 62, "о": 63, "с": 64, "ш": 65, "ѹ": 66, "ḫ": 67, "|": 0, "[UNK]": 68, "[PAD]": 69}