Dragon116rus commited on
Commit
762bd03
1 Parent(s): bbada8b

Saving train state of step 1000

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +1 -0
  2. Makefile +9 -0
  3. README.md +563 -0
  4. added_tokens.json +1609 -0
  5. checkpoint-1000-epoch-1/model.safetensors +3 -0
  6. checkpoint-1000-epoch-1/model_1.safetensors +3 -0
  7. checkpoint-1000-epoch-1/optimizer.bin +3 -0
  8. checkpoint-1000-epoch-1/random_states_0.pkl +3 -0
  9. checkpoint-1000-epoch-1/scaler.pt +3 -0
  10. checkpoint-1000-epoch-1/scheduler.bin +3 -0
  11. common_voice_16_1_ru_pseudo_labelled/dataset_dict.json +1 -0
  12. common_voice_16_1_ru_pseudo_labelled/distil-whisper/events.out.tfevents.1712401565.train02.3718604.0 +3 -0
  13. common_voice_16_1_ru_pseudo_labelled/distil-whisper/events.out.tfevents.1712401615.train02.3718686.0 +3 -0
  14. common_voice_16_1_ru_pseudo_labelled/distil-whisper/events.out.tfevents.1712402554.train02.3719243.0 +3 -0
  15. common_voice_16_1_ru_pseudo_labelled/distil-whisper/events.out.tfevents.1712402791.train02.3719643.0 +3 -0
  16. common_voice_16_1_ru_pseudo_labelled/distil-whisper/events.out.tfevents.1712402838.train02.3720027.0 +3 -0
  17. common_voice_16_1_ru_pseudo_labelled/test-transcription.csv +0 -0
  18. common_voice_16_1_ru_pseudo_labelled/test/data-00000-of-00008.arrow +3 -0
  19. common_voice_16_1_ru_pseudo_labelled/test/data-00001-of-00008.arrow +3 -0
  20. common_voice_16_1_ru_pseudo_labelled/test/data-00002-of-00008.arrow +3 -0
  21. common_voice_16_1_ru_pseudo_labelled/test/data-00003-of-00008.arrow +3 -0
  22. common_voice_16_1_ru_pseudo_labelled/test/data-00004-of-00008.arrow +3 -0
  23. common_voice_16_1_ru_pseudo_labelled/test/data-00005-of-00008.arrow +3 -0
  24. common_voice_16_1_ru_pseudo_labelled/test/data-00006-of-00008.arrow +3 -0
  25. common_voice_16_1_ru_pseudo_labelled/test/data-00007-of-00008.arrow +3 -0
  26. common_voice_16_1_ru_pseudo_labelled/test/dataset_info.json +121 -0
  27. common_voice_16_1_ru_pseudo_labelled/test/state.json +34 -0
  28. common_voice_16_1_ru_pseudo_labelled/train-transcription.csv +0 -0
  29. common_voice_16_1_ru_pseudo_labelled/train/data-00000-of-00009.arrow +3 -0
  30. common_voice_16_1_ru_pseudo_labelled/train/data-00001-of-00009.arrow +3 -0
  31. common_voice_16_1_ru_pseudo_labelled/train/data-00002-of-00009.arrow +3 -0
  32. common_voice_16_1_ru_pseudo_labelled/train/data-00003-of-00009.arrow +3 -0
  33. common_voice_16_1_ru_pseudo_labelled/train/data-00004-of-00009.arrow +3 -0
  34. common_voice_16_1_ru_pseudo_labelled/train/data-00005-of-00009.arrow +3 -0
  35. common_voice_16_1_ru_pseudo_labelled/train/data-00006-of-00009.arrow +3 -0
  36. common_voice_16_1_ru_pseudo_labelled/train/data-00007-of-00009.arrow +3 -0
  37. common_voice_16_1_ru_pseudo_labelled/train/data-00008-of-00009.arrow +3 -0
  38. common_voice_16_1_ru_pseudo_labelled/train/dataset_info.json +121 -0
  39. common_voice_16_1_ru_pseudo_labelled/train/state.json +37 -0
  40. common_voice_16_1_ru_pseudo_labelled/validation-transcription.csv +0 -0
  41. common_voice_16_1_ru_pseudo_labelled/validation/data-00000-of-00008.arrow +3 -0
  42. common_voice_16_1_ru_pseudo_labelled/validation/data-00001-of-00008.arrow +3 -0
  43. common_voice_16_1_ru_pseudo_labelled/validation/data-00002-of-00008.arrow +3 -0
  44. common_voice_16_1_ru_pseudo_labelled/validation/data-00003-of-00008.arrow +3 -0
  45. common_voice_16_1_ru_pseudo_labelled/validation/data-00004-of-00008.arrow +3 -0
  46. common_voice_16_1_ru_pseudo_labelled/validation/data-00005-of-00008.arrow +3 -0
  47. common_voice_16_1_ru_pseudo_labelled/validation/data-00006-of-00008.arrow +3 -0
  48. common_voice_16_1_ru_pseudo_labelled/validation/data-00007-of-00008.arrow +3 -0
  49. common_voice_16_1_ru_pseudo_labelled/validation/dataset_info.json +121 -0
  50. common_voice_16_1_ru_pseudo_labelled/validation/state.json +34 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ wandb
Makefile ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ check_dirs := .
2
+
3
+ quality:
4
+ black --check $(check_dirs)
5
+ ruff $(check_dirs)
6
+
7
+ style:
8
+ black $(check_dirs)
9
+ ruff $(check_dirs) --fix
README.md ADDED
@@ -0,0 +1,563 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Training Distil-Whisper
2
+
3
+ This sub-folder contains all the scripts required to train a Distil-Whisper model in your choice of language. They are
4
+ slightly modified from the original scripts used to distill Whisper for English ASR (as-per the [Distil-Whisper paper](https://arxiv.org/abs/2311.00430)).
5
+ The main difference is that these scripts are written in [PyTorch](https://pytorch.org), whereas the original scripts
6
+ are in [JAX](https://jax.readthedocs.io/en/latest/#)/[Flax](https://flax.readthedocs.io/en/latest/). These scripts are
7
+ also made to be easier to run end-to-end, whereas the original scripts require more steps and are somewhat hard-coded
8
+ for English ASR. Both sets of scripts achieve equivalent downstream results when the hyper-parameters are set equal.
9
+
10
+ If you are interested in reproducing the original Distil-Whisper checkpoints, we refer you to the sub-folder [Flax Training](./flax/README.md).
11
+ Otherwise, if you wish to distill Whisper on your own language/dataset, we recommend you use these scripts for ease of use
12
+ and the configurability they provide.
13
+
14
+ Reproducing the Distil-Whisper project requires four stages to be completed in successive order:
15
+
16
+ 1. [Pseudo-labelling](#1-pseudo-labelling)
17
+ 2. [Initialisation](#2-initialisation)
18
+ 3. [Training](#3-training)
19
+ 4. [Evaluation](#4-evaluation)
20
+
21
+ This README is partitioned according to the four stages. Each section provides a minimal example for running the
22
+ scripts used in the project. We will use a running example of distilling the Whisper model for Hindi speech recognition
23
+ on the Common Voice dataset. Note that this dataset only contains ~20 hours of audio data. Thus, it can be run extremely
24
+ quickly, but does not provide sufficient data to achieve optimal performance. We recommend training on upwards of 1000
25
+ hours of data should you want to match the performance of Whisper on high-resource languages.
26
+
27
+ ## Requirements
28
+
29
+ The Distil-Whisper training code is written in [PyTorch](https://pytorch.org) and [Accelerate](https://huggingface.co/docs/accelerate/index).
30
+ It heavily leverages the Whisper implementation in [🤗 Transformers](https://github.com/huggingface/transformers) for both
31
+ training and inference.
32
+
33
+ The instructions for installing the package are as follows:
34
+ 1. Install PyTorch from the [official instructions](https://pytorch.org/get-started/locally/), ensuring you install the correct version for your hardware and CUDA version.
35
+ 2. Fork the `distil-whisper` repository by clicking on the [fork](https://github.com/huggingface/distil-whisper/fork) button on the reopsitory's page
36
+ 3. Clone the `distil-whisper` repository and add the base repository as a remote. This will allow you to "pull" any upstream changes that are made to the base repository:
37
+
38
+ ```bash
39
+ git clone https://github.com/<your GitHub handle>/distil-whisper.git
40
+ cd distil-whisper
41
+ git remote add upstream https://github.com/huggingface/distil-whisper.git
42
+ ```
43
+ 4. pip install the required packages from the [setup.py](./setup.py) file:
44
+ ```bash
45
+ cd training
46
+ pip install -e .
47
+ cd ../..
48
+ ```
49
+
50
+ 5. Configure Accelerate by running the following command. Note that you should set the number of GPUs you wish to use for distillation, and also the data type (dtype) to your preferred dtype for training/inference (e.g. `bfloat16` on A100 GPUs, `float16` on V100 GPUs, etc.):
51
+
52
+ ```bash
53
+ accelerate config
54
+ ```
55
+
56
+ 6. The last thing we need to do is link our Hugging Face account so that we can pull/push model repositories on the Hub. This will allow us to save our final distilled weights on the Hub so that we can share them with the community. Run the command:
57
+
58
+ ```bash
59
+ git config --global credential.helper store
60
+ huggingface-cli login
61
+ ```
62
+ And then enter an authentication token from https://huggingface.co/settings/tokens. Create a new token if you do not have one already. You should make sure that this token has "write" privileges.
63
+
64
+ To confirm that you have a working environment, first accept the terms of use of the Common Voice 16.1 dataset on the Hub: https://huggingface.co/datasets/mozilla-foundation/common_voice_16_1
65
+
66
+ You can run the following code cell to stream one sample of data from the Common Voice dataset, and check that you can
67
+ perform inference using the "tiny" Whisper model:
68
+
69
+ ```python
70
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration
71
+ from datasets import load_dataset, Audio
72
+
73
+ model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny", low_cpu_mem_usage=True)
74
+ processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
75
+
76
+ model.to("cuda")
77
+
78
+ common_voice = load_dataset("mozilla-foundation/common_voice_16_1", "en", split="validation", streaming=True)
79
+ common_voice = common_voice.cast_column("audio", Audio(sampling_rate=processor.feature_extractor.sampling_rate))
80
+
81
+ inputs = processor(next(iter(common_voice))["audio"]["array"], sampling_rate=16000, return_tensors="pt")
82
+ input_features = inputs.input_features
83
+
84
+ generated_ids = model.generate(input_features.to("cuda"), max_new_tokens=128)
85
+ pred_text = processor.decode(generated_ids[0], skip_special_tokens=True)
86
+
87
+ print("Pred text:", pred_text)
88
+ print("Environment set up successful?", generated_ids.shape[-1] == 20)
89
+ ```
90
+
91
+ ## 1. Pseudo-Labelling
92
+
93
+ The python script [`run_pseudo_labelling.py`](run_pseudo_labelling.py) is a flexible inference script that can be used
94
+ to generate pseudo-labels under a range of settings, including using both greedy and beam-search. It is also compatible
95
+ with [🤗 Datasets](https://github.com/huggingface/datasets) *streaming mode*, allowing users to load massive audio
96
+ datasets with **no disk space requirements**. For more information on streaming mode, the reader is referred to the
97
+ blog post: [A Complete Guide to Audio Datasets](https://huggingface.co/blog/audio-datasets#streaming-mode-the-silver-bullet).
98
+
99
+ > As of the latest Distil-Whisper release, [`distil-large-v3`](https://huggingface.co/distil-whisper/distil-large-v3), this
100
+ pseudo-labelling script also performs the added operation of concatenating (or packing) the audio inputs to 30-seconds.
101
+ Not only does this lead to a WER improvement when using sequential long-form decoding algorithm, but concatenating audios
102
+ to 30-seconds also improves the throughput during training, since the amount of zero-padding on the audio inputs is minimised.
103
+
104
+ The following script demonstrates how to pseudo-label the Hindi split of the Common Voice 16.1 dataset with greedy sampling:
105
+
106
+ ```bash
107
+ #!/usr/bin/env bash
108
+
109
+ accelerate launch run_pseudo_labelling.py \
110
+ --model_name_or_path "openai/whisper-large-v3" \
111
+ --dataset_name "mozilla-foundation/common_voice_16_1" \
112
+ --dataset_config_name "hi" \
113
+ --dataset_split_name "train+validation+test" \
114
+ --text_column_name "sentence" \
115
+ --id_column_name "path" \
116
+ --output_dir "./common_voice_16_1_hi_pseudo_labelled" \
117
+ --wandb_project "distil-whisper-labelling" \
118
+ --per_device_eval_batch_size 64 \
119
+ --dtype "bfloat16" \
120
+ --attn_implementation "sdpa" \
121
+ --logging_steps 500 \
122
+ --max_label_length 256 \
123
+ --concatenate_audio \
124
+ --preprocessing_batch_size 500 \
125
+ --preprocessing_num_workers 8 \
126
+ --dataloader_num_workers 8 \
127
+ --report_to "wandb" \
128
+ --language "hi" \
129
+ --task "transcribe" \
130
+ --return_timestamps \
131
+ --streaming False \
132
+ --generation_num_beams 1 \
133
+ --push_to_hub
134
+ ```
135
+
136
+ On an 80 GB A100 GPU, the following script takes approximately 5 minutes to concatenate and pre-process the 20 hours of
137
+ audio data, and a further 10 minutes to transcribe the pseudo-labels. The pseudo-labelled dataset corresponding to this
138
+ script is available on the Hugging Face Hub under [sanchit-gandhi/common_voice_16_1_hi_pseudo_labelled](https://huggingface.co/datasets/sanchit-gandhi/common_voice_16_1_hi_pseudo_labelled).
139
+ The WER of the pre-trained Whisper large-v3 model is 17.2% on the test split. We will compare the performance of our distilled model against this number.
140
+
141
+ There are two noteworthy arguments that configure the dataset concatenation (or packing) process:
142
+ 1. `concatenate_audio`: whether or not to concatenate (or pack) the audios to 30-second chunks. The latest Distil-Whisper model, [`distil-large-v3`](https://huggingface.co/distil-whisper/distil-large-v3#differences-with-distil-large-v2), highlights the WER improvements obtained using the sequential long-form decoding algorithm when concatenated audios are used. Concatenating audios to 30-seconds also improves the throughput during training, since the amount of zero-padding on the audio inputs is minimised. Hence, it is highly recommended to set `--concatenate_audio=True`.
143
+ 2. `preprocessing_batch_size`: the batch size to use when concatenating (or packing) the audios. Using a larger batch size results in a greater portion of audio samples being packed to 30-seconds, at the expense of higher memory consumption. If you exceed your system's RAM when performing the concatenation operation, reduce the `preprocessing_batch_size` by a factor of 2 to 250 or even 125.
144
+ 3. `preprocessing_num_workers`: the number of multiprocessing workers to use when concatenating the audios. Using more workers will result in faster pre-processing, at the expense of higher memory consumption. Ensure you do not exceed the maximum number of CPUs on your device.
145
+
146
+ In addition, the following arguments configure the inference of the Whisper model:
147
+ 1. `language`: explicitly setting the language token during inference substantially improves the generation performance of the Whisper model, since the model is forced always to predict in the given language. We recommend you set the language to the language you wish to distil the Whisper model on. The only exception is when distilling an English-only model (i.e. where the model id is appended with an `.en`, e.g. `small.en`), the language argument should be set to None, since there is no language token used during training/inference.
148
+ 2. `return_timestamps`: whether or not to predict timestamps in the pseudo-labels. Timestamp prediction is required should you want your distilled model to be able to predict timestamps at inference time (e.g. for the original OpenAI long-form transcription algorithm). However, the pseudo-labels are marginally less accurate than not using timestamps. We recommend pseudo-labelling **with** timestamps to ensure the distilled model is as general as possible.
149
+ 3. `attn_implementation`: which attention implementation to use for inference. Set to `sdpa` for [PyTorch SDPA](https://huggingface.co/docs/transformers/v4.35.2/en/perf_infer_gpu_one#bettertransformer), or `flash_attn_2` if your hardware supports Flash Attention 2 and you have the [package installed](https://github.com/Dao-AILab/flash-attention).
150
+ 4. `streaming`: whether or not to use Datasets' streaming mode. If enabled, the audio data will be streamed from the Hugging Face Hub with no disk space requirements. However, the user is then responsible for adding the pseudo-labels to the dataset script in a follow-up step (see [Using Streaming Mode](#TODO)). If set to `False`, the audio data will be downloaded and pre-processed offline. At the end of pseudo-labelling, the pseudo-labels will be automatically appended to the original dataset, meaning the dataset is ready to be used for the subsequent training step without any additional steps.
151
+ 5. `generation_num_beams`: how many beams to use while decoding. In practice, we found the distilled model to perform comparably when the data was pseudo-labelled with `generation_num_beams=1` (greedy) or `generation_num_beams>1` (beam). This is likely because the WER filter compensates for the lower quality pseudo-labels obtained using greedy search. However, using `generation_num_beams=1` gives substantially faster inference time for the pseudo-labelling step, and so we recommend this configuration.
152
+
153
+ Should you have your own audio dataset, you can first [convert it](https://huggingface.co/docs/datasets/audio_dataset) to
154
+ Hugging Face Datasets format and push it to the Hugging Face Hub. You can then pseudo-label it using the script above,
155
+ replacing the `--dataset_name` with the name of your dataset on the Hub.
156
+
157
+ Otherwise, you may wish to use an open-source dataset already available on the Hugging Face Hub. We provide a summary of
158
+ the three most popular multilingual datasets in the table below. For more details, refer to the blog post: [A Complete Guide to Audio Datasets](https://huggingface.co/blog/audio-datasets#multilingual-speech-recognition).
159
+
160
+ | Dataset | Languages | Domain | Speaking Style | License | Text Column | ID Column |
161
+ |-----------------------------------------------------------------------------------------------|-----------|---------------------------------------|----------------|-----------|---------------------|--------------|
162
+ | [Multilingual LibriSpeech](https://huggingface.co/datasets/facebook/multilingual_librispeech) | 6 | Audiobooks | Narrated | CC-BY-4.0 | `"text"` | `"id"` |
163
+ | [Common Voice 16](https://huggingface.co/datasets/mozilla-foundation/common_voice_16_1) | 120 | Wikipedia text & crowd-sourced speech | Narrated | CC0-1.0 | `"sentence"` | `"path"` |
164
+ | [VoxPopuli](https://huggingface.co/datasets/facebook/voxpopuli) | 15 | European Parliament recordings | Spontaneous | CC0 | `"normalized_text"` | `"audio_id"` |
165
+
166
+ To achieve *robustness* to different distributions of audio data, it is recommended to train on multiple datasets where possible.
167
+ For example, the above three datasets all have splits for the German language. Thus, if distilling a Whisper model for German,
168
+ it would be wise to use a combination of the three datasets during training, in order to cover at least three distinct domains
169
+ (audiobooks, crowd-sourced speech, parliament recordings). You may wish to use a combination of open-source datasets, or
170
+ a combination of open-source and individually owned datasets to cover multiple distributions and domains.
171
+
172
+ ## 2. Initialisation
173
+
174
+ The script [`create_student_model.py`](create_student_model.py) can be used to initialise a small student model
175
+ from a large teacher model. When initialising a student model with fewer layers than the teacher model, the student is
176
+ initialised by copying maximally spaced layers from the teacher, as per the [DistilBart](https://arxiv.org/abs/2010.13002)
177
+ recommendations.
178
+
179
+ First, we need to create a model repository on the Hugging Face Hub. This repository will contain all the required files
180
+ to reproduce the training run, alongside model weights, training logs and a README.md card. You can either create a model
181
+ repository directly on the Hugging Face Hub using the link: https://huggingface.co/new. Or, via the CLI, as we'll show here.
182
+
183
+ Let's pick a name for our distilled model: `distil-whisper-large-v3-hi`. We can run the following command to create a repository under this name:
184
+
185
+ ```bash
186
+ huggingface-cli repo create distil-whisper-large-v3-hi
187
+ ```
188
+
189
+ We can now see the model on the Hub, e.g. under https://huggingface.co/sanchit-gandhi/distil-whisper-large-v3-hi
190
+
191
+ Let's clone the repository so that we can place our training script and model weights inside:
192
+
193
+ ```bash
194
+ git lfs install
195
+ git clone https://huggingface.co/sanchit-gandhi/distil-whisper-large-v3-hi
196
+ ```
197
+
198
+ Be sure to change the repo address to `https://huggingface.co/<your-user-name>/<your-repo-name>`
199
+
200
+ We can now copy the relevant training scrips to the repository:
201
+ ```bash
202
+ cd distil-whisper-large-v3-hi
203
+
204
+ cp ../distil-whisper/training/create_student_model.py .
205
+ cp ../distil-whisper/training/run_distillation.py .
206
+ ```
207
+
208
+ The following command demonstrates how to initialise a student model from the Whisper [large-v3](https://huggingface.co/openai/whisper-large-v3)
209
+ checkpoint, with all 32 encoder layer and 2 decoder layers. The 2 student decoder layers are copied from teacher layers
210
+ 1 and 32 respectively, as the maximally spaced layers:
211
+
212
+ ```bash
213
+ #!/usr/bin/env bash
214
+
215
+ python create_student_model.py \
216
+ --teacher_checkpoint "openai/whisper-large-v3" \
217
+ --encoder_layers 32 \
218
+ --decoder_layers 2 \
219
+ --save_dir "./distil-large-v3-init"
220
+ ```
221
+
222
+ The initialised model will be saved to the sub-directory `distil-large-v3-init` in our model repository.
223
+
224
+ ## 3. Training
225
+
226
+ The script [`run_distillation.py`](run_distillation.py) is an end-to-end script for loading multiple
227
+ datasets, a student model, a teacher model, and performing teacher-student distillation. It uses the loss formulation
228
+ from the [Distil-Whisper paper](https://arxiv.org/abs/2311.00430), which is a weighted sum of the cross-entropy and
229
+ KL-divergence loss terms.
230
+
231
+ The following command takes the Common Voice dataset that was pseudo-labelled in the first stage and trains the
232
+ 2-layer decoder model intialised in the previous step. We pass the local path to the pseudo-labelled Common Voice dataset
233
+ (`../common_voice_16_1_hi_pseudo_labelled`), which you can change to the path where your local pseudo-labelled dataset is
234
+ saved.
235
+
236
+ In this example, we will combine the train and validation splits to give our training set, and evaluate on the test split
237
+ only. This is purely to demonstrate how to combine multiple pseudo-labelled datasets for training, rather than recommended
238
+ advice for defining train/validation splits. We advise that you train on the train splits of your dataset, evaluate and
239
+ tune hyper-parameters on the validation split, and only test the final checkpoint on the test split. Note how multiple
240
+ training datasets and splits can be loaded by separating the dataset arguments by `+` symbols. Thus, the script generalises
241
+ to any number of training datasets.
242
+
243
+ ```bash
244
+ #!/usr/bin/env bash
245
+
246
+ accelerate launch run_distillation.py \
247
+ --model_name_or_path "./distil-large-v3-init" \
248
+ --teacher_model_name_or_path "openai/whisper-large-v3" \
249
+ --train_dataset_name "../common_voice_16_1_hi_pseudo_labelled+../common_voice_16_1_hi_pseudo_labelled" \
250
+ --train_split_name "train+validation" \
251
+ --text_column_name "sentence+sentence" \
252
+ --train_dataset_samples "7+4" \
253
+ --eval_dataset_name "../common_voice_16_1_hi_pseudo_labelled" \
254
+ --eval_split_name "test" \
255
+ --eval_text_column_name "sentence" \
256
+ --eval_steps 1000 \
257
+ --save_steps 1000 \
258
+ --warmup_steps 50 \
259
+ --learning_rate 0.0001 \
260
+ --lr_scheduler_type "constant_with_warmup" \
261
+ --timestamp_probability 0.2 \
262
+ --condition_on_prev_probability 0.2 \
263
+ --language "hi" \
264
+ --task "transcribe" \
265
+ --logging_steps 25 \
266
+ --save_total_limit 1 \
267
+ --max_steps 5000 \
268
+ --wer_threshold 20 \
269
+ --per_device_train_batch_size 32 \
270
+ --per_device_eval_batch_size 32 \
271
+ --dataloader_num_workers 8 \
272
+ --preprocessing_num_workers 8 \
273
+ --ddp_timeout 7200 \
274
+ --dtype "bfloat16" \
275
+ --attn_implementation "sdpa" \
276
+ --output_dir "./" \
277
+ --do_train \
278
+ --do_eval \
279
+ --gradient_checkpointing \
280
+ --overwrite_output_dir \
281
+ --predict_with_generate \
282
+ --freeze_encoder \
283
+ --freeze_embed_positions \
284
+ --streaming False \
285
+ --push_to_hub
286
+
287
+ ```
288
+
289
+ The above training script will take approximately 3 hours to complete on an 80 GB A100 GPU and yield a final WER of 76%.
290
+ While the generations are starting to take form, there is still a 59% WER gap to the teacher model. This is hardly
291
+ surprising give we only have 15 hours of un-filtered data, and closer to just 1.5 hours with data filtering.
292
+ As mentioned above, using upwards of 1000 hours of data and training for 10k steps will likely yield
293
+ more competitive performance. For the [Distil-Whisper paper](https://arxiv.org/abs/2311.00430), we trained on 21k hours
294
+ of audio data for 80k steps. We found that upwards of 13k hours of audio data was required to reach convergence on English
295
+ ASR (see Section 9.2 of the [paper](https://arxiv.org/abs/2311.00430)), so the more data you have, the better!
296
+
297
+ Scaling to multiple GPUs using [distributed data parallelism (DDP)](https://pytorch.org/tutorials/beginner/ddp_series_theory.html)
298
+ is trivial: simply run `accelerate config` and select the multi-GPU option, specifying the IDs of the GPUs you wish to use. The
299
+ above script can then be run using DDP with no code changes.
300
+
301
+ Training logs will be reported to TensorBoard and WandB, provided the relevant packages are available. An example of a
302
+ saved checkpoint pushed to the Hugging Face Hub can be found here: [sanchit-gandhi/distil-whisper-large-v3-hi](https://huggingface.co/sanchit-gandhi/distil-whisper-large-v3-hi).
303
+
304
+ There are a few noteworthy data arguments:
305
+ 1. `train_dataset_samples`: defines the number of training samples in each dataset. Used to calculate the sampling probabilities in the dataloader. A good starting point is setting the samples to the number of hours of audio data in each split. A more refined strategy is setting it to the number of training samples in each split, however this might require downloading the dataset offline to compute these statistics.
306
+ 2. `wer_threshold`: sets the WER threshold between the normalised pseudo-labels and normalised ground truth labels. Any samples with WER > `wer_threshold` are discarded from the training data. This is beneficial to avoid training the student model on pseudo-labels where Whisper hallucinated or got the predictions grossly wrong. In our English distillation experiments, we found a WER threshold of 10% provides the optimal trade-off between ensuring high-quality transcriptions, and not filtering unnecessary amounts of training data. For multilingual distillation, the threshold should be set in accordance with the WER achieved by the pre-trained model on the test set.
307
+ 3. `streaming`: whether or not to use Datasets' streaming mode. Recommended for large datasets, where the audio data can be streamed from the Hugging Face Hub with no disk space requirements.
308
+ 4. `timestamp_probability`: the per-sample probability for retaining timestamp tokens in the labels (should they contain them). Retaining some portion of timestamp tokens in the training data is required to ensure the distilled model can predict timestamps at inference time. In our experiments, we found that training on timestamps with high-probability hurts the distilled model's transcription performance. Thus, we recommend setting this to a value below 0.5. Typically, a value of 0.2 works well, giving good transcription and timestamp performance.
309
+ 5. `condition_on_prev_probability`: the per-sample probability for conditioning on previous labels. Conditioning on previous tokens is required to ensure the distilled model can be used with the "sequential" long-form transcription algorithm at inference time. We did not experiment with this parameter, but found values around 0.2 to provide adequate performance. OpenAI pre-trained Whisper on with a 50% probability for conditioning on previous tokens. Thus, you might wish to try higher values.
310
+
311
+ As well as a few noteworthy model arguments that can be configured to give optimal training performance:
312
+ 1. `freeze_encoder`: whether to freeze the entire encoder of the student model during training. Beneficial when the student encoder is copied exactly from the teacher encoder. In this case, the encoder hidden-states from the teacher model are re-used for the student model. Stopping the gradient computation through the encoder and sharing the encoder hidden-states provides a significant memory saving, and can enable up to 2x batch sizes.
313
+ 2. `freeze_embed_positions`: whether to freeze the student model's decoder positional embeddings. Using the same embed positions as the teacher model, which is designed to handle context lengths up to 448 tokens, helps the student model retain its input id representation up to the full max input length.
314
+ 3. `dtype`: data type (dtype) in which the model computation should be performed. Note that this only controls the dtype of the computations (forward and backward pass), and not the dtype of the parameters or optimiser states.
315
+
316
+ And finally, a few noteworthy training arguments:
317
+ 1. `max_steps`: defines the total number of optimisation steps (forward + backward pass) during training. To reach convergence, you should use a dataset of at least 1k hours and train for a minimum of 50k steps.
318
+ 2. `lr_scheduler_stype`: defines the learning rate schedule, one of `constant_with_warmup` or `linear`. When experimenting with a training set-up or training for very few steps (< 5k), using `constant_with_warmup` is typically beneficial, since the learning rate remains high over the short training run. When performing long training runs (> 5k), using a `linear` schedule generally results in superior downstream performance of the distilled model.
319
+
320
+ TODO:
321
+ - [ ] Template for model cards
322
+
323
+ ## 4. Evaluation
324
+
325
+ There are four types of evaluation performed in Distil-Whisper:
326
+ 1. Short form: evaluation on audio samples less than 30s in duration. Examples include typical ASR test sets, such as the LibriSpeech validation set.
327
+ 2. Sequential long form: evaluation on audio samples longer than 30s in duration using the original "sequential" long-form algorithm. Examples include entire TED talks or earnings calls.
328
+ 3. Chunked long form: evaluation on audio samples longer than 30s in duration using the Transformers "chunked" long-form algorithm.
329
+ 4. Speculative decoding: evaluation on audio samples less than 30s in duration, where a faster, distilled model is used as the assistant to a slower, teacher model.
330
+
331
+ All four forms of evaluation are performed using the script [`run_eval.py`](run_eval.py). Unlike the pseudo-labelling
332
+ and training scripts, the evaluation script assumes that only one GPU accelerator is used. We can copy the corresponding
333
+ evaluation script to the model repository using the following command:
334
+
335
+ ```bash
336
+ cp ../distil-whisper/training/run_eval.py .
337
+ ```
338
+
339
+ Models are assessed jointly using:
340
+ 1. The *word-error rate (WER)* metric: measures the numer of substitution, deletion and insertion errors relative to the total number of words. A lower WER indicates a more accurate model.
341
+ 2. The *inverse real-time factor (RTFx)* metric: measures the ratio of `audio input time : model compute time`. A higher RTFx indicates a faster model.
342
+
343
+ In all cases, it is particularly important to evaluate the final model on data that is *out-of-distribution (OOD)* with
344
+ the training data. Evaluating on OOD data provides insight as to how well the distilled model is likely to generalise to
345
+ different audio distributions at inference time. In our example, the Common Voice test set is *in-distribution (ID)*
346
+ with our training data, since it is taken from the same distribution as the Common Voice training set. Whereas the FLEURS
347
+ test set is OOD, since it is not used as part of the training set.
348
+
349
+ ### Short Form
350
+
351
+ The script [`run_eval.py`](run_eval.py) can be used to evaluate a trained student model over multiple short-form
352
+ validation sets. The following example demonstrates how to evaluate the student model trained in the previous step on
353
+ the Common Voice `test` set (ID) and also the FLEURS `test` set (OOD). Again, it leverages streaming mode to bypass
354
+ the need to download the data offline:
355
+
356
+ ```bash
357
+ #!/usr/bin/env bash
358
+
359
+ python run_eval.py \
360
+ --model_name_or_path "./" \
361
+ --dataset_name "../common_voice_16_1_hi_pseudo_labelled+google/fleurs" \
362
+ --dataset_config_name "default+hi_in" \
363
+ --dataset_split_name "test+test" \
364
+ --text_column_name "sentence+transcription" \
365
+ --batch_size 16 \
366
+ --dtype "bfloat16" \
367
+ --generation_max_length 256 \
368
+ --language "hi" \
369
+ --attn_implementation "sdpa" \
370
+ --streaming
371
+
372
+ ```
373
+
374
+ The student model achieves an average WER of TODO% with an RTFx of TODO for a batch size of 16. We can easily adapt the above
375
+ script to evaluate the teacher model, simply by switching the `model_name_or_path` to `openai/whisper-large-v3`, which
376
+ achieves an average WER of TODO% with an RTFx of TODO. Therefore, for a batch size of 16, the student model is a factor of TODO
377
+ times faster than the teacher. The WER gap can be closed by training on more data (at least 1k hours) for more training
378
+ steps (at least 50k).
379
+
380
+ ### Sequential Long Form
381
+
382
+ The original Whisper paper presents a long-form transcription algorithm that sequentially transcribes 30-second segments
383
+ of audio and shifts the sliding window according to the timestamps predicted by the model. This style of sequential
384
+ inference is performed directly using the [`.generate`](https://huggingface.co/docs/transformers/model_doc/whisper#transformers.WhisperForConditionalGeneration.generate)
385
+ method in Transformers.
386
+
387
+ The script [`run_eval.py`](run_eval.py) can be used to evaluate the trained student model on an arbitrary number of
388
+ long-form evaluation sets using the sequential algorithm. Since we don't have a long-form validation set for Hindi to hand,
389
+ in this example we'll evaluate the official Distil-Whisper model [`distil-large-v3`](https://huggingface.co/distil-whisper/distil-large-v3)
390
+ on the TED-LIUM validation set:
391
+
392
+ ```bash
393
+ #!/usr/bin/env bash
394
+
395
+ accelerate launch run_eval.py \
396
+ --model_name_or_path "distil-whisper/distil-large-v3" \
397
+ --dataset_name "distil-whisper/tedlium-long-form" \
398
+ --dataset_config_name "default" \
399
+ --dataset_split_name "validation" \
400
+ --text_column_name "text" \
401
+ --batch_size 16 \
402
+ --dtype "bfloat16" \
403
+ --generation_max_length 256 \
404
+ --language "en" \
405
+ --attn_implementation "sdpa" \
406
+ --streaming
407
+
408
+ ```
409
+
410
+ ### Chunked Long Form
411
+
412
+ Chunked long form evaluation runs on the premise that a single long audio file can be *chunked* into smaller segments and
413
+ inferred in parallel. The resulting transcriptions are then joined at the boundaries to give the final text prediction.
414
+ A small overlap (or *stride*) is used between adjacent segments to ensure a continuous transcription across chunks.
415
+
416
+ This style of chunked inference is performed using the [`pipeline`](https://huggingface.co/docs/transformers/main_classes/pipelines)
417
+ class, which provides a wrapper around the [`.generate`](https://huggingface.co/docs/transformers/model_doc/whisper#transformers.WhisperForConditionalGeneration.generate)
418
+ function for long-form inference.
419
+
420
+ The script [`run_eval.py`](run_eval.py) can be used to evaluate the trained student model on an arbitrary number of
421
+ long-form evaluation sets using the pipeline class. Again, in this example we'll evaluate distil-large-v3 on the
422
+ TED-LIUM validation set:
423
+
424
+ ```bash
425
+ #!/usr/bin/env bash
426
+
427
+ python run_eval.py \
428
+ --model_name_or_path "openai/whisper-large-v3" \
429
+ --dataset_name "distil-whisper/tedlium-long-form" \
430
+ --dataset_config_name "default" \
431
+ --dataset_split_name "validation" \
432
+ --text_column_name "text" \
433
+ --use_pipeline \
434
+ --chunk_length_s 25.0 \
435
+ --language "en" \
436
+ --return_timestamps \
437
+ --dtype "bfloat16" \
438
+ --streaming
439
+
440
+ ```
441
+
442
+ The argument `chunk_length_s` controls the length of the chunked audio samples. It should be set to match the typical
443
+ length of audio the student model was trained on. If unsure about what value of `chunk_length_s` is optimal for your case,
444
+ it is recommended to run a *sweep* over all possible values. A template script for running a [WandB sweep](https://docs.wandb.ai/guides/sweeps)
445
+ can be found under [`run_chunk_length_s_sweep.yaml`](flax/long_form_transcription_scripts/run_chunk_length_s_sweep.yaml).
446
+
447
+ ### Speculative Decoding
448
+
449
+ Speculative decoding, or assisted generation, relies on the premise that a faster, assistant model can be used to speed-up
450
+ the generation of a slower, assistant model. Speculative decoding mathematically ensures that exactly the same outputs as
451
+ Whisper are obtained, while being ~2 times faster. This makes it the perfect drop-in replacement for existing Whisper
452
+ pipelines, since exactly the same outputs are guaranteed.
453
+
454
+ Distil-Whisper checkpoints can be designed to be efficient assistant models to Whisper for speculative decoding. More precisely,
455
+ by freezing the encoder during training, the distilled model can share the same encoder weights as Whisper during inference, since
456
+ the encoder weights are un-changed. In doing so, only the distilled 2-layer decoder has to be loaded in addition to the
457
+ original Whisper model, which is approximately an 8% increase to the total parameter count, with up to 2x faster inference
458
+ for low batch sizes. For more details on speculative decoding, the reader is advised to refer to the following blog post:
459
+ [Speculative Decoding for 2x Faster Whisper Inference](https://huggingface.co/blog/whisper-speculative-decoding).
460
+
461
+ In the example below, we use our distilled model as an assistant to the large-v3 teacher model during inference:
462
+
463
+ ```bash
464
+ #!/usr/bin/env bash
465
+
466
+ python run_eval.py \
467
+ --model_name_or_path "openai/whisper-large-v3" \
468
+ --assistant_model_name_or_path "./" \
469
+ --dataset_name "../common_voice_16_1_hi_pseudo_labelled+google/fleurs" \
470
+ --dataset_config_name "default+hi_in" \
471
+ --dataset_split_name "test+test" \
472
+ --text_column_name "sentence+transcription" \
473
+ --batch_size 16 \
474
+ --dtype "bfloat16" \
475
+ --generation_max_length 256 \
476
+ --language "hi" \
477
+ --attn_implementation "sdpa" \
478
+ --streaming
479
+
480
+ ```
481
+
482
+ We see that we achieve a WER of TODO%, the same as what we obtained with the large-v3 model, but with an RTFx of TODO,
483
+ a factor of TODO faster than using the large-v3 model alone. The RTFx value can be improved by training the student on
484
+ more data and for more training steps, since this will improve the number of predicted tokens that match the teacher
485
+ predictions.
486
+
487
+ ## Overview of Training Methods
488
+
489
+ ### 1. Fine-Tuning
490
+
491
+ For fine-tuning, we take the original Whisper checkpoint and train it on one or more datasets using the standard
492
+ cross-entropy loss. As such, there is no involvement from the teacher checkpoint during training, and so the fine-tuned
493
+ model is permitted to *overfit* to the distribution of the training data we provide. This makes it appealing for "low-resource"
494
+ languages where the original Whisper model performs poorly, since we can boost the performance of the model on a single
495
+ language by *overfitting* to that distribution of data. Note that this means the fine-tuned model is prone to loosing
496
+ its robustness to different audio distributions, which is the trade-off with improving performance on a specified dataset.
497
+
498
+ As a rule of thumb, fine-tuning is appropriate for languages where the original Whisper model performs > 20% WER, and we
499
+ have a relatively small quantity of training data available (< 1000 hours). With fine-tuning, we require as little as **10 hours**
500
+ of training data to significantly boost the performance of the Whisper model. For an in-depth guide to fine-tuning Whisper,
501
+ the reader is advised to refer to the blog post: [Fine-Tune Whisper For Multilingual ASR with 🤗 Transformers](https://huggingface.co/blog/fine-tune-whisper).
502
+
503
+ ### 2. Shrink and Fine-Tune
504
+
505
+ Shrink and fine-tune (SFT) is a knowledge distillation (KD) technique in which we first *shrink* the teacher model to a
506
+ smaller student model by copying maximally spaced layers, and then *fine-tune* the student model on the cross-entropy loss
507
+ as described above. Typically, we retain the full encoder from the Whisper model and only shrink the decoder. Retaining
508
+ the entire encoder helps significantly with maintaining Whisper's robustness to different audio distributions (_c.f._
509
+ Section 9.3 of the [Distil-Whisper paper](https://arxiv.org/abs/2311.00430)).
510
+
511
+ We can either train the student model on a dataset of (audio, text) pairs as above. Or, we can use the pre-trained
512
+ Whisper model to generate *pseudo-labels* for our audio data, and train on the (audio, pseudo-label) pairs.
513
+
514
+ Pseudo-labels can be used when either:
515
+ 1. The original text transcriptions are normalised (lower-cased or no punctuation): the Whisper generated pseudo-labels contain both punctuation and casing, and so can be used as a substitute for the normalised transcriptions
516
+ 2. The pre-trained Whisper model achieves < 20% WER on the languages: we then know the majority of the pseudo-labels will be accurate enough for us to train on.
517
+
518
+ They are not recommended when both of the following are true:
519
+ 1. The original text is punctuated and cased
520
+ 2. The pre-trained Whisper model achieves > 20% WER on the languages: in this case, we want to overfit to the particular distribution of the language, and so train directly on the original text data
521
+
522
+ To discard inaccurate pseudo-labels during training, we employ a simple WER heuristic to filter our pseudo-labelled
523
+ training data. We first normalise the original text and the pseudo-labelled text using the Whisper normaliser. If the
524
+ WER between the normalised text exceeds a 10% WER threshold, we discard the training sample. Else, we retain it for training.
525
+ Section 9.1 of the Distil-Whisper [paper](https://arxiv.org/abs/2311.00430) demonstrates the importance of using this
526
+ threshold for training.
527
+
528
+ ### 3. KL Divergence
529
+
530
+ In the KL Divergence setting, the student model is initialised by shrinking the teacher as before, and then trained to
531
+ match the predictions of the teacher during training.
532
+
533
+ ### Summary of Methods
534
+
535
+ The following table summarises the two training paradigms: fine-tuning and knowledge distillation (KD). It suggests
536
+ minimum values for the pre-trained WER / training data to achieve reasonable performance:
537
+
538
+ | Method | Pre-Trained WER / % | Training Data / h |
539
+ |-------------|---------------------|-------------------|
540
+ | Fine-tuning | > 20 | < 1000 |
541
+ | KD | < 20 | > 1000 |
542
+
543
+ ## Acknowledgements
544
+
545
+ * OpenAI for the Whisper [model](https://huggingface.co/openai/whisper-large-v3) and [original codebase](https://github.com/openai/whisper)
546
+ * Hugging Face 🤗 [Transformers](https://github.com/huggingface/transformers) for the Whisper model implementation
547
+ * Google's [TPU Research Cloud (TRC)](https://sites.research.google/trc/about/) program for Cloud TPU v4s used to train the official Distil-Whisper models
548
+ * The Hugging Face 🤗 cluster for enabling experimentation with the PyTorch scripts
549
+
550
+ ## Citation
551
+
552
+ If you use this code-base, please consider citing the Distil-Whisper paper:
553
+
554
+ ```
555
+ @misc{gandhi2023distilwhisper,
556
+ title={Distil-Whisper: Robust Knowledge Distillation via Large-Scale Pseudo Labelling},
557
+ author={Sanchit Gandhi and Patrick von Platen and Alexander M. Rush},
558
+ year={2023},
559
+ eprint={2311.00430},
560
+ archivePrefix={arXiv},
561
+ primaryClass={cs.CL}
562
+ }
563
+ ```
added_tokens.json ADDED
@@ -0,0 +1,1609 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<|0.00|>": 50364,
3
+ "<|0.02|>": 50365,
4
+ "<|0.04|>": 50366,
5
+ "<|0.06|>": 50367,
6
+ "<|0.08|>": 50368,
7
+ "<|0.10|>": 50369,
8
+ "<|0.12|>": 50370,
9
+ "<|0.14|>": 50371,
10
+ "<|0.16|>": 50372,
11
+ "<|0.18|>": 50373,
12
+ "<|0.20|>": 50374,
13
+ "<|0.22|>": 50375,
14
+ "<|0.24|>": 50376,
15
+ "<|0.26|>": 50377,
16
+ "<|0.28|>": 50378,
17
+ "<|0.30|>": 50379,
18
+ "<|0.32|>": 50380,
19
+ "<|0.34|>": 50381,
20
+ "<|0.36|>": 50382,
21
+ "<|0.38|>": 50383,
22
+ "<|0.40|>": 50384,
23
+ "<|0.42|>": 50385,
24
+ "<|0.44|>": 50386,
25
+ "<|0.46|>": 50387,
26
+ "<|0.48|>": 50388,
27
+ "<|0.50|>": 50389,
28
+ "<|0.52|>": 50390,
29
+ "<|0.54|>": 50391,
30
+ "<|0.56|>": 50392,
31
+ "<|0.58|>": 50393,
32
+ "<|0.60|>": 50394,
33
+ "<|0.62|>": 50395,
34
+ "<|0.64|>": 50396,
35
+ "<|0.66|>": 50397,
36
+ "<|0.68|>": 50398,
37
+ "<|0.70|>": 50399,
38
+ "<|0.72|>": 50400,
39
+ "<|0.74|>": 50401,
40
+ "<|0.76|>": 50402,
41
+ "<|0.78|>": 50403,
42
+ "<|0.80|>": 50404,
43
+ "<|0.82|>": 50405,
44
+ "<|0.84|>": 50406,
45
+ "<|0.86|>": 50407,
46
+ "<|0.88|>": 50408,
47
+ "<|0.90|>": 50409,
48
+ "<|0.92|>": 50410,
49
+ "<|0.94|>": 50411,
50
+ "<|0.96|>": 50412,
51
+ "<|0.98|>": 50413,
52
+ "<|1.00|>": 50414,
53
+ "<|1.02|>": 50415,
54
+ "<|1.04|>": 50416,
55
+ "<|1.06|>": 50417,
56
+ "<|1.08|>": 50418,
57
+ "<|1.10|>": 50419,
58
+ "<|1.12|>": 50420,
59
+ "<|1.14|>": 50421,
60
+ "<|1.16|>": 50422,
61
+ "<|1.18|>": 50423,
62
+ "<|1.20|>": 50424,
63
+ "<|1.22|>": 50425,
64
+ "<|1.24|>": 50426,
65
+ "<|1.26|>": 50427,
66
+ "<|1.28|>": 50428,
67
+ "<|1.30|>": 50429,
68
+ "<|1.32|>": 50430,
69
+ "<|1.34|>": 50431,
70
+ "<|1.36|>": 50432,
71
+ "<|1.38|>": 50433,
72
+ "<|1.40|>": 50434,
73
+ "<|1.42|>": 50435,
74
+ "<|1.44|>": 50436,
75
+ "<|1.46|>": 50437,
76
+ "<|1.48|>": 50438,
77
+ "<|1.50|>": 50439,
78
+ "<|1.52|>": 50440,
79
+ "<|1.54|>": 50441,
80
+ "<|1.56|>": 50442,
81
+ "<|1.58|>": 50443,
82
+ "<|1.60|>": 50444,
83
+ "<|1.62|>": 50445,
84
+ "<|1.64|>": 50446,
85
+ "<|1.66|>": 50447,
86
+ "<|1.68|>": 50448,
87
+ "<|1.70|>": 50449,
88
+ "<|1.72|>": 50450,
89
+ "<|1.74|>": 50451,
90
+ "<|1.76|>": 50452,
91
+ "<|1.78|>": 50453,
92
+ "<|1.80|>": 50454,
93
+ "<|1.82|>": 50455,
94
+ "<|1.84|>": 50456,
95
+ "<|1.86|>": 50457,
96
+ "<|1.88|>": 50458,
97
+ "<|1.90|>": 50459,
98
+ "<|1.92|>": 50460,
99
+ "<|1.94|>": 50461,
100
+ "<|1.96|>": 50462,
101
+ "<|1.98|>": 50463,
102
+ "<|10.00|>": 50864,
103
+ "<|10.02|>": 50865,
104
+ "<|10.04|>": 50866,
105
+ "<|10.06|>": 50867,
106
+ "<|10.08|>": 50868,
107
+ "<|10.10|>": 50869,
108
+ "<|10.12|>": 50870,
109
+ "<|10.14|>": 50871,
110
+ "<|10.16|>": 50872,
111
+ "<|10.18|>": 50873,
112
+ "<|10.20|>": 50874,
113
+ "<|10.22|>": 50875,
114
+ "<|10.24|>": 50876,
115
+ "<|10.26|>": 50877,
116
+ "<|10.28|>": 50878,
117
+ "<|10.30|>": 50879,
118
+ "<|10.32|>": 50880,
119
+ "<|10.34|>": 50881,
120
+ "<|10.36|>": 50882,
121
+ "<|10.38|>": 50883,
122
+ "<|10.40|>": 50884,
123
+ "<|10.42|>": 50885,
124
+ "<|10.44|>": 50886,
125
+ "<|10.46|>": 50887,
126
+ "<|10.48|>": 50888,
127
+ "<|10.50|>": 50889,
128
+ "<|10.52|>": 50890,
129
+ "<|10.54|>": 50891,
130
+ "<|10.56|>": 50892,
131
+ "<|10.58|>": 50893,
132
+ "<|10.60|>": 50894,
133
+ "<|10.62|>": 50895,
134
+ "<|10.64|>": 50896,
135
+ "<|10.66|>": 50897,
136
+ "<|10.68|>": 50898,
137
+ "<|10.70|>": 50899,
138
+ "<|10.72|>": 50900,
139
+ "<|10.74|>": 50901,
140
+ "<|10.76|>": 50902,
141
+ "<|10.78|>": 50903,
142
+ "<|10.80|>": 50904,
143
+ "<|10.82|>": 50905,
144
+ "<|10.84|>": 50906,
145
+ "<|10.86|>": 50907,
146
+ "<|10.88|>": 50908,
147
+ "<|10.90|>": 50909,
148
+ "<|10.92|>": 50910,
149
+ "<|10.94|>": 50911,
150
+ "<|10.96|>": 50912,
151
+ "<|10.98|>": 50913,
152
+ "<|11.00|>": 50914,
153
+ "<|11.02|>": 50915,
154
+ "<|11.04|>": 50916,
155
+ "<|11.06|>": 50917,
156
+ "<|11.08|>": 50918,
157
+ "<|11.10|>": 50919,
158
+ "<|11.12|>": 50920,
159
+ "<|11.14|>": 50921,
160
+ "<|11.16|>": 50922,
161
+ "<|11.18|>": 50923,
162
+ "<|11.20|>": 50924,
163
+ "<|11.22|>": 50925,
164
+ "<|11.24|>": 50926,
165
+ "<|11.26|>": 50927,
166
+ "<|11.28|>": 50928,
167
+ "<|11.30|>": 50929,
168
+ "<|11.32|>": 50930,
169
+ "<|11.34|>": 50931,
170
+ "<|11.36|>": 50932,
171
+ "<|11.38|>": 50933,
172
+ "<|11.40|>": 50934,
173
+ "<|11.42|>": 50935,
174
+ "<|11.44|>": 50936,
175
+ "<|11.46|>": 50937,
176
+ "<|11.48|>": 50938,
177
+ "<|11.50|>": 50939,
178
+ "<|11.52|>": 50940,
179
+ "<|11.54|>": 50941,
180
+ "<|11.56|>": 50942,
181
+ "<|11.58|>": 50943,
182
+ "<|11.60|>": 50944,
183
+ "<|11.62|>": 50945,
184
+ "<|11.64|>": 50946,
185
+ "<|11.66|>": 50947,
186
+ "<|11.68|>": 50948,
187
+ "<|11.70|>": 50949,
188
+ "<|11.72|>": 50950,
189
+ "<|11.74|>": 50951,
190
+ "<|11.76|>": 50952,
191
+ "<|11.78|>": 50953,
192
+ "<|11.80|>": 50954,
193
+ "<|11.82|>": 50955,
194
+ "<|11.84|>": 50956,
195
+ "<|11.86|>": 50957,
196
+ "<|11.88|>": 50958,
197
+ "<|11.90|>": 50959,
198
+ "<|11.92|>": 50960,
199
+ "<|11.94|>": 50961,
200
+ "<|11.96|>": 50962,
201
+ "<|11.98|>": 50963,
202
+ "<|12.00|>": 50964,
203
+ "<|12.02|>": 50965,
204
+ "<|12.04|>": 50966,
205
+ "<|12.06|>": 50967,
206
+ "<|12.08|>": 50968,
207
+ "<|12.10|>": 50969,
208
+ "<|12.12|>": 50970,
209
+ "<|12.14|>": 50971,
210
+ "<|12.16|>": 50972,
211
+ "<|12.18|>": 50973,
212
+ "<|12.20|>": 50974,
213
+ "<|12.22|>": 50975,
214
+ "<|12.24|>": 50976,
215
+ "<|12.26|>": 50977,
216
+ "<|12.28|>": 50978,
217
+ "<|12.30|>": 50979,
218
+ "<|12.32|>": 50980,
219
+ "<|12.34|>": 50981,
220
+ "<|12.36|>": 50982,
221
+ "<|12.38|>": 50983,
222
+ "<|12.40|>": 50984,
223
+ "<|12.42|>": 50985,
224
+ "<|12.44|>": 50986,
225
+ "<|12.46|>": 50987,
226
+ "<|12.48|>": 50988,
227
+ "<|12.50|>": 50989,
228
+ "<|12.52|>": 50990,
229
+ "<|12.54|>": 50991,
230
+ "<|12.56|>": 50992,
231
+ "<|12.58|>": 50993,
232
+ "<|12.60|>": 50994,
233
+ "<|12.62|>": 50995,
234
+ "<|12.64|>": 50996,
235
+ "<|12.66|>": 50997,
236
+ "<|12.68|>": 50998,
237
+ "<|12.70|>": 50999,
238
+ "<|12.72|>": 51000,
239
+ "<|12.74|>": 51001,
240
+ "<|12.76|>": 51002,
241
+ "<|12.78|>": 51003,
242
+ "<|12.80|>": 51004,
243
+ "<|12.82|>": 51005,
244
+ "<|12.84|>": 51006,
245
+ "<|12.86|>": 51007,
246
+ "<|12.88|>": 51008,
247
+ "<|12.90|>": 51009,
248
+ "<|12.92|>": 51010,
249
+ "<|12.94|>": 51011,
250
+ "<|12.96|>": 51012,
251
+ "<|12.98|>": 51013,
252
+ "<|13.00|>": 51014,
253
+ "<|13.02|>": 51015,
254
+ "<|13.04|>": 51016,
255
+ "<|13.06|>": 51017,
256
+ "<|13.08|>": 51018,
257
+ "<|13.10|>": 51019,
258
+ "<|13.12|>": 51020,
259
+ "<|13.14|>": 51021,
260
+ "<|13.16|>": 51022,
261
+ "<|13.18|>": 51023,
262
+ "<|13.20|>": 51024,
263
+ "<|13.22|>": 51025,
264
+ "<|13.24|>": 51026,
265
+ "<|13.26|>": 51027,
266
+ "<|13.28|>": 51028,
267
+ "<|13.30|>": 51029,
268
+ "<|13.32|>": 51030,
269
+ "<|13.34|>": 51031,
270
+ "<|13.36|>": 51032,
271
+ "<|13.38|>": 51033,
272
+ "<|13.40|>": 51034,
273
+ "<|13.42|>": 51035,
274
+ "<|13.44|>": 51036,
275
+ "<|13.46|>": 51037,
276
+ "<|13.48|>": 51038,
277
+ "<|13.50|>": 51039,
278
+ "<|13.52|>": 51040,
279
+ "<|13.54|>": 51041,
280
+ "<|13.56|>": 51042,
281
+ "<|13.58|>": 51043,
282
+ "<|13.60|>": 51044,
283
+ "<|13.62|>": 51045,
284
+ "<|13.64|>": 51046,
285
+ "<|13.66|>": 51047,
286
+ "<|13.68|>": 51048,
287
+ "<|13.70|>": 51049,
288
+ "<|13.72|>": 51050,
289
+ "<|13.74|>": 51051,
290
+ "<|13.76|>": 51052,
291
+ "<|13.78|>": 51053,
292
+ "<|13.80|>": 51054,
293
+ "<|13.82|>": 51055,
294
+ "<|13.84|>": 51056,
295
+ "<|13.86|>": 51057,
296
+ "<|13.88|>": 51058,
297
+ "<|13.90|>": 51059,
298
+ "<|13.92|>": 51060,
299
+ "<|13.94|>": 51061,
300
+ "<|13.96|>": 51062,
301
+ "<|13.98|>": 51063,
302
+ "<|14.00|>": 51064,
303
+ "<|14.02|>": 51065,
304
+ "<|14.04|>": 51066,
305
+ "<|14.06|>": 51067,
306
+ "<|14.08|>": 51068,
307
+ "<|14.10|>": 51069,
308
+ "<|14.12|>": 51070,
309
+ "<|14.14|>": 51071,
310
+ "<|14.16|>": 51072,
311
+ "<|14.18|>": 51073,
312
+ "<|14.20|>": 51074,
313
+ "<|14.22|>": 51075,
314
+ "<|14.24|>": 51076,
315
+ "<|14.26|>": 51077,
316
+ "<|14.28|>": 51078,
317
+ "<|14.30|>": 51079,
318
+ "<|14.32|>": 51080,
319
+ "<|14.34|>": 51081,
320
+ "<|14.36|>": 51082,
321
+ "<|14.38|>": 51083,
322
+ "<|14.40|>": 51084,
323
+ "<|14.42|>": 51085,
324
+ "<|14.44|>": 51086,
325
+ "<|14.46|>": 51087,
326
+ "<|14.48|>": 51088,
327
+ "<|14.50|>": 51089,
328
+ "<|14.52|>": 51090,
329
+ "<|14.54|>": 51091,
330
+ "<|14.56|>": 51092,
331
+ "<|14.58|>": 51093,
332
+ "<|14.60|>": 51094,
333
+ "<|14.62|>": 51095,
334
+ "<|14.64|>": 51096,
335
+ "<|14.66|>": 51097,
336
+ "<|14.68|>": 51098,
337
+ "<|14.70|>": 51099,
338
+ "<|14.72|>": 51100,
339
+ "<|14.74|>": 51101,
340
+ "<|14.76|>": 51102,
341
+ "<|14.78|>": 51103,
342
+ "<|14.80|>": 51104,
343
+ "<|14.82|>": 51105,
344
+ "<|14.84|>": 51106,
345
+ "<|14.86|>": 51107,
346
+ "<|14.88|>": 51108,
347
+ "<|14.90|>": 51109,
348
+ "<|14.92|>": 51110,
349
+ "<|14.94|>": 51111,
350
+ "<|14.96|>": 51112,
351
+ "<|14.98|>": 51113,
352
+ "<|15.00|>": 51114,
353
+ "<|15.02|>": 51115,
354
+ "<|15.04|>": 51116,
355
+ "<|15.06|>": 51117,
356
+ "<|15.08|>": 51118,
357
+ "<|15.10|>": 51119,
358
+ "<|15.12|>": 51120,
359
+ "<|15.14|>": 51121,
360
+ "<|15.16|>": 51122,
361
+ "<|15.18|>": 51123,
362
+ "<|15.20|>": 51124,
363
+ "<|15.22|>": 51125,
364
+ "<|15.24|>": 51126,
365
+ "<|15.26|>": 51127,
366
+ "<|15.28|>": 51128,
367
+ "<|15.30|>": 51129,
368
+ "<|15.32|>": 51130,
369
+ "<|15.34|>": 51131,
370
+ "<|15.36|>": 51132,
371
+ "<|15.38|>": 51133,
372
+ "<|15.40|>": 51134,
373
+ "<|15.42|>": 51135,
374
+ "<|15.44|>": 51136,
375
+ "<|15.46|>": 51137,
376
+ "<|15.48|>": 51138,
377
+ "<|15.50|>": 51139,
378
+ "<|15.52|>": 51140,
379
+ "<|15.54|>": 51141,
380
+ "<|15.56|>": 51142,
381
+ "<|15.58|>": 51143,
382
+ "<|15.60|>": 51144,
383
+ "<|15.62|>": 51145,
384
+ "<|15.64|>": 51146,
385
+ "<|15.66|>": 51147,
386
+ "<|15.68|>": 51148,
387
+ "<|15.70|>": 51149,
388
+ "<|15.72|>": 51150,
389
+ "<|15.74|>": 51151,
390
+ "<|15.76|>": 51152,
391
+ "<|15.78|>": 51153,
392
+ "<|15.80|>": 51154,
393
+ "<|15.82|>": 51155,
394
+ "<|15.84|>": 51156,
395
+ "<|15.86|>": 51157,
396
+ "<|15.88|>": 51158,
397
+ "<|15.90|>": 51159,
398
+ "<|15.92|>": 51160,
399
+ "<|15.94|>": 51161,
400
+ "<|15.96|>": 51162,
401
+ "<|15.98|>": 51163,
402
+ "<|16.00|>": 51164,
403
+ "<|16.02|>": 51165,
404
+ "<|16.04|>": 51166,
405
+ "<|16.06|>": 51167,
406
+ "<|16.08|>": 51168,
407
+ "<|16.10|>": 51169,
408
+ "<|16.12|>": 51170,
409
+ "<|16.14|>": 51171,
410
+ "<|16.16|>": 51172,
411
+ "<|16.18|>": 51173,
412
+ "<|16.20|>": 51174,
413
+ "<|16.22|>": 51175,
414
+ "<|16.24|>": 51176,
415
+ "<|16.26|>": 51177,
416
+ "<|16.28|>": 51178,
417
+ "<|16.30|>": 51179,
418
+ "<|16.32|>": 51180,
419
+ "<|16.34|>": 51181,
420
+ "<|16.36|>": 51182,
421
+ "<|16.38|>": 51183,
422
+ "<|16.40|>": 51184,
423
+ "<|16.42|>": 51185,
424
+ "<|16.44|>": 51186,
425
+ "<|16.46|>": 51187,
426
+ "<|16.48|>": 51188,
427
+ "<|16.50|>": 51189,
428
+ "<|16.52|>": 51190,
429
+ "<|16.54|>": 51191,
430
+ "<|16.56|>": 51192,
431
+ "<|16.58|>": 51193,
432
+ "<|16.60|>": 51194,
433
+ "<|16.62|>": 51195,
434
+ "<|16.64|>": 51196,
435
+ "<|16.66|>": 51197,
436
+ "<|16.68|>": 51198,
437
+ "<|16.70|>": 51199,
438
+ "<|16.72|>": 51200,
439
+ "<|16.74|>": 51201,
440
+ "<|16.76|>": 51202,
441
+ "<|16.78|>": 51203,
442
+ "<|16.80|>": 51204,
443
+ "<|16.82|>": 51205,
444
+ "<|16.84|>": 51206,
445
+ "<|16.86|>": 51207,
446
+ "<|16.88|>": 51208,
447
+ "<|16.90|>": 51209,
448
+ "<|16.92|>": 51210,
449
+ "<|16.94|>": 51211,
450
+ "<|16.96|>": 51212,
451
+ "<|16.98|>": 51213,
452
+ "<|17.00|>": 51214,
453
+ "<|17.02|>": 51215,
454
+ "<|17.04|>": 51216,
455
+ "<|17.06|>": 51217,
456
+ "<|17.08|>": 51218,
457
+ "<|17.10|>": 51219,
458
+ "<|17.12|>": 51220,
459
+ "<|17.14|>": 51221,
460
+ "<|17.16|>": 51222,
461
+ "<|17.18|>": 51223,
462
+ "<|17.20|>": 51224,
463
+ "<|17.22|>": 51225,
464
+ "<|17.24|>": 51226,
465
+ "<|17.26|>": 51227,
466
+ "<|17.28|>": 51228,
467
+ "<|17.30|>": 51229,
468
+ "<|17.32|>": 51230,
469
+ "<|17.34|>": 51231,
470
+ "<|17.36|>": 51232,
471
+ "<|17.38|>": 51233,
472
+ "<|17.40|>": 51234,
473
+ "<|17.42|>": 51235,
474
+ "<|17.44|>": 51236,
475
+ "<|17.46|>": 51237,
476
+ "<|17.48|>": 51238,
477
+ "<|17.50|>": 51239,
478
+ "<|17.52|>": 51240,
479
+ "<|17.54|>": 51241,
480
+ "<|17.56|>": 51242,
481
+ "<|17.58|>": 51243,
482
+ "<|17.60|>": 51244,
483
+ "<|17.62|>": 51245,
484
+ "<|17.64|>": 51246,
485
+ "<|17.66|>": 51247,
486
+ "<|17.68|>": 51248,
487
+ "<|17.70|>": 51249,
488
+ "<|17.72|>": 51250,
489
+ "<|17.74|>": 51251,
490
+ "<|17.76|>": 51252,
491
+ "<|17.78|>": 51253,
492
+ "<|17.80|>": 51254,
493
+ "<|17.82|>": 51255,
494
+ "<|17.84|>": 51256,
495
+ "<|17.86|>": 51257,
496
+ "<|17.88|>": 51258,
497
+ "<|17.90|>": 51259,
498
+ "<|17.92|>": 51260,
499
+ "<|17.94|>": 51261,
500
+ "<|17.96|>": 51262,
501
+ "<|17.98|>": 51263,
502
+ "<|18.00|>": 51264,
503
+ "<|18.02|>": 51265,
504
+ "<|18.04|>": 51266,
505
+ "<|18.06|>": 51267,
506
+ "<|18.08|>": 51268,
507
+ "<|18.10|>": 51269,
508
+ "<|18.12|>": 51270,
509
+ "<|18.14|>": 51271,
510
+ "<|18.16|>": 51272,
511
+ "<|18.18|>": 51273,
512
+ "<|18.20|>": 51274,
513
+ "<|18.22|>": 51275,
514
+ "<|18.24|>": 51276,
515
+ "<|18.26|>": 51277,
516
+ "<|18.28|>": 51278,
517
+ "<|18.30|>": 51279,
518
+ "<|18.32|>": 51280,
519
+ "<|18.34|>": 51281,
520
+ "<|18.36|>": 51282,
521
+ "<|18.38|>": 51283,
522
+ "<|18.40|>": 51284,
523
+ "<|18.42|>": 51285,
524
+ "<|18.44|>": 51286,
525
+ "<|18.46|>": 51287,
526
+ "<|18.48|>": 51288,
527
+ "<|18.50|>": 51289,
528
+ "<|18.52|>": 51290,
529
+ "<|18.54|>": 51291,
530
+ "<|18.56|>": 51292,
531
+ "<|18.58|>": 51293,
532
+ "<|18.60|>": 51294,
533
+ "<|18.62|>": 51295,
534
+ "<|18.64|>": 51296,
535
+ "<|18.66|>": 51297,
536
+ "<|18.68|>": 51298,
537
+ "<|18.70|>": 51299,
538
+ "<|18.72|>": 51300,
539
+ "<|18.74|>": 51301,
540
+ "<|18.76|>": 51302,
541
+ "<|18.78|>": 51303,
542
+ "<|18.80|>": 51304,
543
+ "<|18.82|>": 51305,
544
+ "<|18.84|>": 51306,
545
+ "<|18.86|>": 51307,
546
+ "<|18.88|>": 51308,
547
+ "<|18.90|>": 51309,
548
+ "<|18.92|>": 51310,
549
+ "<|18.94|>": 51311,
550
+ "<|18.96|>": 51312,
551
+ "<|18.98|>": 51313,
552
+ "<|19.00|>": 51314,
553
+ "<|19.02|>": 51315,
554
+ "<|19.04|>": 51316,
555
+ "<|19.06|>": 51317,
556
+ "<|19.08|>": 51318,
557
+ "<|19.10|>": 51319,
558
+ "<|19.12|>": 51320,
559
+ "<|19.14|>": 51321,
560
+ "<|19.16|>": 51322,
561
+ "<|19.18|>": 51323,
562
+ "<|19.20|>": 51324,
563
+ "<|19.22|>": 51325,
564
+ "<|19.24|>": 51326,
565
+ "<|19.26|>": 51327,
566
+ "<|19.28|>": 51328,
567
+ "<|19.30|>": 51329,
568
+ "<|19.32|>": 51330,
569
+ "<|19.34|>": 51331,
570
+ "<|19.36|>": 51332,
571
+ "<|19.38|>": 51333,
572
+ "<|19.40|>": 51334,
573
+ "<|19.42|>": 51335,
574
+ "<|19.44|>": 51336,
575
+ "<|19.46|>": 51337,
576
+ "<|19.48|>": 51338,
577
+ "<|19.50|>": 51339,
578
+ "<|19.52|>": 51340,
579
+ "<|19.54|>": 51341,
580
+ "<|19.56|>": 51342,
581
+ "<|19.58|>": 51343,
582
+ "<|19.60|>": 51344,
583
+ "<|19.62|>": 51345,
584
+ "<|19.64|>": 51346,
585
+ "<|19.66|>": 51347,
586
+ "<|19.68|>": 51348,
587
+ "<|19.70|>": 51349,
588
+ "<|19.72|>": 51350,
589
+ "<|19.74|>": 51351,
590
+ "<|19.76|>": 51352,
591
+ "<|19.78|>": 51353,
592
+ "<|19.80|>": 51354,
593
+ "<|19.82|>": 51355,
594
+ "<|19.84|>": 51356,
595
+ "<|19.86|>": 51357,
596
+ "<|19.88|>": 51358,
597
+ "<|19.90|>": 51359,
598
+ "<|19.92|>": 51360,
599
+ "<|19.94|>": 51361,
600
+ "<|19.96|>": 51362,
601
+ "<|19.98|>": 51363,
602
+ "<|2.00|>": 50464,
603
+ "<|2.02|>": 50465,
604
+ "<|2.04|>": 50466,
605
+ "<|2.06|>": 50467,
606
+ "<|2.08|>": 50468,
607
+ "<|2.10|>": 50469,
608
+ "<|2.12|>": 50470,
609
+ "<|2.14|>": 50471,
610
+ "<|2.16|>": 50472,
611
+ "<|2.18|>": 50473,
612
+ "<|2.20|>": 50474,
613
+ "<|2.22|>": 50475,
614
+ "<|2.24|>": 50476,
615
+ "<|2.26|>": 50477,
616
+ "<|2.28|>": 50478,
617
+ "<|2.30|>": 50479,
618
+ "<|2.32|>": 50480,
619
+ "<|2.34|>": 50481,
620
+ "<|2.36|>": 50482,
621
+ "<|2.38|>": 50483,
622
+ "<|2.40|>": 50484,
623
+ "<|2.42|>": 50485,
624
+ "<|2.44|>": 50486,
625
+ "<|2.46|>": 50487,
626
+ "<|2.48|>": 50488,
627
+ "<|2.50|>": 50489,
628
+ "<|2.52|>": 50490,
629
+ "<|2.54|>": 50491,
630
+ "<|2.56|>": 50492,
631
+ "<|2.58|>": 50493,
632
+ "<|2.60|>": 50494,
633
+ "<|2.62|>": 50495,
634
+ "<|2.64|>": 50496,
635
+ "<|2.66|>": 50497,
636
+ "<|2.68|>": 50498,
637
+ "<|2.70|>": 50499,
638
+ "<|2.72|>": 50500,
639
+ "<|2.74|>": 50501,
640
+ "<|2.76|>": 50502,
641
+ "<|2.78|>": 50503,
642
+ "<|2.80|>": 50504,
643
+ "<|2.82|>": 50505,
644
+ "<|2.84|>": 50506,
645
+ "<|2.86|>": 50507,
646
+ "<|2.88|>": 50508,
647
+ "<|2.90|>": 50509,
648
+ "<|2.92|>": 50510,
649
+ "<|2.94|>": 50511,
650
+ "<|2.96|>": 50512,
651
+ "<|2.98|>": 50513,
652
+ "<|20.00|>": 51364,
653
+ "<|20.02|>": 51365,
654
+ "<|20.04|>": 51366,
655
+ "<|20.06|>": 51367,
656
+ "<|20.08|>": 51368,
657
+ "<|20.10|>": 51369,
658
+ "<|20.12|>": 51370,
659
+ "<|20.14|>": 51371,
660
+ "<|20.16|>": 51372,
661
+ "<|20.18|>": 51373,
662
+ "<|20.20|>": 51374,
663
+ "<|20.22|>": 51375,
664
+ "<|20.24|>": 51376,
665
+ "<|20.26|>": 51377,
666
+ "<|20.28|>": 51378,
667
+ "<|20.30|>": 51379,
668
+ "<|20.32|>": 51380,
669
+ "<|20.34|>": 51381,
670
+ "<|20.36|>": 51382,
671
+ "<|20.38|>": 51383,
672
+ "<|20.40|>": 51384,
673
+ "<|20.42|>": 51385,
674
+ "<|20.44|>": 51386,
675
+ "<|20.46|>": 51387,
676
+ "<|20.48|>": 51388,
677
+ "<|20.50|>": 51389,
678
+ "<|20.52|>": 51390,
679
+ "<|20.54|>": 51391,
680
+ "<|20.56|>": 51392,
681
+ "<|20.58|>": 51393,
682
+ "<|20.60|>": 51394,
683
+ "<|20.62|>": 51395,
684
+ "<|20.64|>": 51396,
685
+ "<|20.66|>": 51397,
686
+ "<|20.68|>": 51398,
687
+ "<|20.70|>": 51399,
688
+ "<|20.72|>": 51400,
689
+ "<|20.74|>": 51401,
690
+ "<|20.76|>": 51402,
691
+ "<|20.78|>": 51403,
692
+ "<|20.80|>": 51404,
693
+ "<|20.82|>": 51405,
694
+ "<|20.84|>": 51406,
695
+ "<|20.86|>": 51407,
696
+ "<|20.88|>": 51408,
697
+ "<|20.90|>": 51409,
698
+ "<|20.92|>": 51410,
699
+ "<|20.94|>": 51411,
700
+ "<|20.96|>": 51412,
701
+ "<|20.98|>": 51413,
702
+ "<|21.00|>": 51414,
703
+ "<|21.02|>": 51415,
704
+ "<|21.04|>": 51416,
705
+ "<|21.06|>": 51417,
706
+ "<|21.08|>": 51418,
707
+ "<|21.10|>": 51419,
708
+ "<|21.12|>": 51420,
709
+ "<|21.14|>": 51421,
710
+ "<|21.16|>": 51422,
711
+ "<|21.18|>": 51423,
712
+ "<|21.20|>": 51424,
713
+ "<|21.22|>": 51425,
714
+ "<|21.24|>": 51426,
715
+ "<|21.26|>": 51427,
716
+ "<|21.28|>": 51428,
717
+ "<|21.30|>": 51429,
718
+ "<|21.32|>": 51430,
719
+ "<|21.34|>": 51431,
720
+ "<|21.36|>": 51432,
721
+ "<|21.38|>": 51433,
722
+ "<|21.40|>": 51434,
723
+ "<|21.42|>": 51435,
724
+ "<|21.44|>": 51436,
725
+ "<|21.46|>": 51437,
726
+ "<|21.48|>": 51438,
727
+ "<|21.50|>": 51439,
728
+ "<|21.52|>": 51440,
729
+ "<|21.54|>": 51441,
730
+ "<|21.56|>": 51442,
731
+ "<|21.58|>": 51443,
732
+ "<|21.60|>": 51444,
733
+ "<|21.62|>": 51445,
734
+ "<|21.64|>": 51446,
735
+ "<|21.66|>": 51447,
736
+ "<|21.68|>": 51448,
737
+ "<|21.70|>": 51449,
738
+ "<|21.72|>": 51450,
739
+ "<|21.74|>": 51451,
740
+ "<|21.76|>": 51452,
741
+ "<|21.78|>": 51453,
742
+ "<|21.80|>": 51454,
743
+ "<|21.82|>": 51455,
744
+ "<|21.84|>": 51456,
745
+ "<|21.86|>": 51457,
746
+ "<|21.88|>": 51458,
747
+ "<|21.90|>": 51459,
748
+ "<|21.92|>": 51460,
749
+ "<|21.94|>": 51461,
750
+ "<|21.96|>": 51462,
751
+ "<|21.98|>": 51463,
752
+ "<|22.00|>": 51464,
753
+ "<|22.02|>": 51465,
754
+ "<|22.04|>": 51466,
755
+ "<|22.06|>": 51467,
756
+ "<|22.08|>": 51468,
757
+ "<|22.10|>": 51469,
758
+ "<|22.12|>": 51470,
759
+ "<|22.14|>": 51471,
760
+ "<|22.16|>": 51472,
761
+ "<|22.18|>": 51473,
762
+ "<|22.20|>": 51474,
763
+ "<|22.22|>": 51475,
764
+ "<|22.24|>": 51476,
765
+ "<|22.26|>": 51477,
766
+ "<|22.28|>": 51478,
767
+ "<|22.30|>": 51479,
768
+ "<|22.32|>": 51480,
769
+ "<|22.34|>": 51481,
770
+ "<|22.36|>": 51482,
771
+ "<|22.38|>": 51483,
772
+ "<|22.40|>": 51484,
773
+ "<|22.42|>": 51485,
774
+ "<|22.44|>": 51486,
775
+ "<|22.46|>": 51487,
776
+ "<|22.48|>": 51488,
777
+ "<|22.50|>": 51489,
778
+ "<|22.52|>": 51490,
779
+ "<|22.54|>": 51491,
780
+ "<|22.56|>": 51492,
781
+ "<|22.58|>": 51493,
782
+ "<|22.60|>": 51494,
783
+ "<|22.62|>": 51495,
784
+ "<|22.64|>": 51496,
785
+ "<|22.66|>": 51497,
786
+ "<|22.68|>": 51498,
787
+ "<|22.70|>": 51499,
788
+ "<|22.72|>": 51500,
789
+ "<|22.74|>": 51501,
790
+ "<|22.76|>": 51502,
791
+ "<|22.78|>": 51503,
792
+ "<|22.80|>": 51504,
793
+ "<|22.82|>": 51505,
794
+ "<|22.84|>": 51506,
795
+ "<|22.86|>": 51507,
796
+ "<|22.88|>": 51508,
797
+ "<|22.90|>": 51509,
798
+ "<|22.92|>": 51510,
799
+ "<|22.94|>": 51511,
800
+ "<|22.96|>": 51512,
801
+ "<|22.98|>": 51513,
802
+ "<|23.00|>": 51514,
803
+ "<|23.02|>": 51515,
804
+ "<|23.04|>": 51516,
805
+ "<|23.06|>": 51517,
806
+ "<|23.08|>": 51518,
807
+ "<|23.10|>": 51519,
808
+ "<|23.12|>": 51520,
809
+ "<|23.14|>": 51521,
810
+ "<|23.16|>": 51522,
811
+ "<|23.18|>": 51523,
812
+ "<|23.20|>": 51524,
813
+ "<|23.22|>": 51525,
814
+ "<|23.24|>": 51526,
815
+ "<|23.26|>": 51527,
816
+ "<|23.28|>": 51528,
817
+ "<|23.30|>": 51529,
818
+ "<|23.32|>": 51530,
819
+ "<|23.34|>": 51531,
820
+ "<|23.36|>": 51532,
821
+ "<|23.38|>": 51533,
822
+ "<|23.40|>": 51534,
823
+ "<|23.42|>": 51535,
824
+ "<|23.44|>": 51536,
825
+ "<|23.46|>": 51537,
826
+ "<|23.48|>": 51538,
827
+ "<|23.50|>": 51539,
828
+ "<|23.52|>": 51540,
829
+ "<|23.54|>": 51541,
830
+ "<|23.56|>": 51542,
831
+ "<|23.58|>": 51543,
832
+ "<|23.60|>": 51544,
833
+ "<|23.62|>": 51545,
834
+ "<|23.64|>": 51546,
835
+ "<|23.66|>": 51547,
836
+ "<|23.68|>": 51548,
837
+ "<|23.70|>": 51549,
838
+ "<|23.72|>": 51550,
839
+ "<|23.74|>": 51551,
840
+ "<|23.76|>": 51552,
841
+ "<|23.78|>": 51553,
842
+ "<|23.80|>": 51554,
843
+ "<|23.82|>": 51555,
844
+ "<|23.84|>": 51556,
845
+ "<|23.86|>": 51557,
846
+ "<|23.88|>": 51558,
847
+ "<|23.90|>": 51559,
848
+ "<|23.92|>": 51560,
849
+ "<|23.94|>": 51561,
850
+ "<|23.96|>": 51562,
851
+ "<|23.98|>": 51563,
852
+ "<|24.00|>": 51564,
853
+ "<|24.02|>": 51565,
854
+ "<|24.04|>": 51566,
855
+ "<|24.06|>": 51567,
856
+ "<|24.08|>": 51568,
857
+ "<|24.10|>": 51569,
858
+ "<|24.12|>": 51570,
859
+ "<|24.14|>": 51571,
860
+ "<|24.16|>": 51572,
861
+ "<|24.18|>": 51573,
862
+ "<|24.20|>": 51574,
863
+ "<|24.22|>": 51575,
864
+ "<|24.24|>": 51576,
865
+ "<|24.26|>": 51577,
866
+ "<|24.28|>": 51578,
867
+ "<|24.30|>": 51579,
868
+ "<|24.32|>": 51580,
869
+ "<|24.34|>": 51581,
870
+ "<|24.36|>": 51582,
871
+ "<|24.38|>": 51583,
872
+ "<|24.40|>": 51584,
873
+ "<|24.42|>": 51585,
874
+ "<|24.44|>": 51586,
875
+ "<|24.46|>": 51587,
876
+ "<|24.48|>": 51588,
877
+ "<|24.50|>": 51589,
878
+ "<|24.52|>": 51590,
879
+ "<|24.54|>": 51591,
880
+ "<|24.56|>": 51592,
881
+ "<|24.58|>": 51593,
882
+ "<|24.60|>": 51594,
883
+ "<|24.62|>": 51595,
884
+ "<|24.64|>": 51596,
885
+ "<|24.66|>": 51597,
886
+ "<|24.68|>": 51598,
887
+ "<|24.70|>": 51599,
888
+ "<|24.72|>": 51600,
889
+ "<|24.74|>": 51601,
890
+ "<|24.76|>": 51602,
891
+ "<|24.78|>": 51603,
892
+ "<|24.80|>": 51604,
893
+ "<|24.82|>": 51605,
894
+ "<|24.84|>": 51606,
895
+ "<|24.86|>": 51607,
896
+ "<|24.88|>": 51608,
897
+ "<|24.90|>": 51609,
898
+ "<|24.92|>": 51610,
899
+ "<|24.94|>": 51611,
900
+ "<|24.96|>": 51612,
901
+ "<|24.98|>": 51613,
902
+ "<|25.00|>": 51614,
903
+ "<|25.02|>": 51615,
904
+ "<|25.04|>": 51616,
905
+ "<|25.06|>": 51617,
906
+ "<|25.08|>": 51618,
907
+ "<|25.10|>": 51619,
908
+ "<|25.12|>": 51620,
909
+ "<|25.14|>": 51621,
910
+ "<|25.16|>": 51622,
911
+ "<|25.18|>": 51623,
912
+ "<|25.20|>": 51624,
913
+ "<|25.22|>": 51625,
914
+ "<|25.24|>": 51626,
915
+ "<|25.26|>": 51627,
916
+ "<|25.28|>": 51628,
917
+ "<|25.30|>": 51629,
918
+ "<|25.32|>": 51630,
919
+ "<|25.34|>": 51631,
920
+ "<|25.36|>": 51632,
921
+ "<|25.38|>": 51633,
922
+ "<|25.40|>": 51634,
923
+ "<|25.42|>": 51635,
924
+ "<|25.44|>": 51636,
925
+ "<|25.46|>": 51637,
926
+ "<|25.48|>": 51638,
927
+ "<|25.50|>": 51639,
928
+ "<|25.52|>": 51640,
929
+ "<|25.54|>": 51641,
930
+ "<|25.56|>": 51642,
931
+ "<|25.58|>": 51643,
932
+ "<|25.60|>": 51644,
933
+ "<|25.62|>": 51645,
934
+ "<|25.64|>": 51646,
935
+ "<|25.66|>": 51647,
936
+ "<|25.68|>": 51648,
937
+ "<|25.70|>": 51649,
938
+ "<|25.72|>": 51650,
939
+ "<|25.74|>": 51651,
940
+ "<|25.76|>": 51652,
941
+ "<|25.78|>": 51653,
942
+ "<|25.80|>": 51654,
943
+ "<|25.82|>": 51655,
944
+ "<|25.84|>": 51656,
945
+ "<|25.86|>": 51657,
946
+ "<|25.88|>": 51658,
947
+ "<|25.90|>": 51659,
948
+ "<|25.92|>": 51660,
949
+ "<|25.94|>": 51661,
950
+ "<|25.96|>": 51662,
951
+ "<|25.98|>": 51663,
952
+ "<|26.00|>": 51664,
953
+ "<|26.02|>": 51665,
954
+ "<|26.04|>": 51666,
955
+ "<|26.06|>": 51667,
956
+ "<|26.08|>": 51668,
957
+ "<|26.10|>": 51669,
958
+ "<|26.12|>": 51670,
959
+ "<|26.14|>": 51671,
960
+ "<|26.16|>": 51672,
961
+ "<|26.18|>": 51673,
962
+ "<|26.20|>": 51674,
963
+ "<|26.22|>": 51675,
964
+ "<|26.24|>": 51676,
965
+ "<|26.26|>": 51677,
966
+ "<|26.28|>": 51678,
967
+ "<|26.30|>": 51679,
968
+ "<|26.32|>": 51680,
969
+ "<|26.34|>": 51681,
970
+ "<|26.36|>": 51682,
971
+ "<|26.38|>": 51683,
972
+ "<|26.40|>": 51684,
973
+ "<|26.42|>": 51685,
974
+ "<|26.44|>": 51686,
975
+ "<|26.46|>": 51687,
976
+ "<|26.48|>": 51688,
977
+ "<|26.50|>": 51689,
978
+ "<|26.52|>": 51690,
979
+ "<|26.54|>": 51691,
980
+ "<|26.56|>": 51692,
981
+ "<|26.58|>": 51693,
982
+ "<|26.60|>": 51694,
983
+ "<|26.62|>": 51695,
984
+ "<|26.64|>": 51696,
985
+ "<|26.66|>": 51697,
986
+ "<|26.68|>": 51698,
987
+ "<|26.70|>": 51699,
988
+ "<|26.72|>": 51700,
989
+ "<|26.74|>": 51701,
990
+ "<|26.76|>": 51702,
991
+ "<|26.78|>": 51703,
992
+ "<|26.80|>": 51704,
993
+ "<|26.82|>": 51705,
994
+ "<|26.84|>": 51706,
995
+ "<|26.86|>": 51707,
996
+ "<|26.88|>": 51708,
997
+ "<|26.90|>": 51709,
998
+ "<|26.92|>": 51710,
999
+ "<|26.94|>": 51711,
1000
+ "<|26.96|>": 51712,
1001
+ "<|26.98|>": 51713,
1002
+ "<|27.00|>": 51714,
1003
+ "<|27.02|>": 51715,
1004
+ "<|27.04|>": 51716,
1005
+ "<|27.06|>": 51717,
1006
+ "<|27.08|>": 51718,
1007
+ "<|27.10|>": 51719,
1008
+ "<|27.12|>": 51720,
1009
+ "<|27.14|>": 51721,
1010
+ "<|27.16|>": 51722,
1011
+ "<|27.18|>": 51723,
1012
+ "<|27.20|>": 51724,
1013
+ "<|27.22|>": 51725,
1014
+ "<|27.24|>": 51726,
1015
+ "<|27.26|>": 51727,
1016
+ "<|27.28|>": 51728,
1017
+ "<|27.30|>": 51729,
1018
+ "<|27.32|>": 51730,
1019
+ "<|27.34|>": 51731,
1020
+ "<|27.36|>": 51732,
1021
+ "<|27.38|>": 51733,
1022
+ "<|27.40|>": 51734,
1023
+ "<|27.42|>": 51735,
1024
+ "<|27.44|>": 51736,
1025
+ "<|27.46|>": 51737,
1026
+ "<|27.48|>": 51738,
1027
+ "<|27.50|>": 51739,
1028
+ "<|27.52|>": 51740,
1029
+ "<|27.54|>": 51741,
1030
+ "<|27.56|>": 51742,
1031
+ "<|27.58|>": 51743,
1032
+ "<|27.60|>": 51744,
1033
+ "<|27.62|>": 51745,
1034
+ "<|27.64|>": 51746,
1035
+ "<|27.66|>": 51747,
1036
+ "<|27.68|>": 51748,
1037
+ "<|27.70|>": 51749,
1038
+ "<|27.72|>": 51750,
1039
+ "<|27.74|>": 51751,
1040
+ "<|27.76|>": 51752,
1041
+ "<|27.78|>": 51753,
1042
+ "<|27.80|>": 51754,
1043
+ "<|27.82|>": 51755,
1044
+ "<|27.84|>": 51756,
1045
+ "<|27.86|>": 51757,
1046
+ "<|27.88|>": 51758,
1047
+ "<|27.90|>": 51759,
1048
+ "<|27.92|>": 51760,
1049
+ "<|27.94|>": 51761,
1050
+ "<|27.96|>": 51762,
1051
+ "<|27.98|>": 51763,
1052
+ "<|28.00|>": 51764,
1053
+ "<|28.02|>": 51765,
1054
+ "<|28.04|>": 51766,
1055
+ "<|28.06|>": 51767,
1056
+ "<|28.08|>": 51768,
1057
+ "<|28.10|>": 51769,
1058
+ "<|28.12|>": 51770,
1059
+ "<|28.14|>": 51771,
1060
+ "<|28.16|>": 51772,
1061
+ "<|28.18|>": 51773,
1062
+ "<|28.20|>": 51774,
1063
+ "<|28.22|>": 51775,
1064
+ "<|28.24|>": 51776,
1065
+ "<|28.26|>": 51777,
1066
+ "<|28.28|>": 51778,
1067
+ "<|28.30|>": 51779,
1068
+ "<|28.32|>": 51780,
1069
+ "<|28.34|>": 51781,
1070
+ "<|28.36|>": 51782,
1071
+ "<|28.38|>": 51783,
1072
+ "<|28.40|>": 51784,
1073
+ "<|28.42|>": 51785,
1074
+ "<|28.44|>": 51786,
1075
+ "<|28.46|>": 51787,
1076
+ "<|28.48|>": 51788,
1077
+ "<|28.50|>": 51789,
1078
+ "<|28.52|>": 51790,
1079
+ "<|28.54|>": 51791,
1080
+ "<|28.56|>": 51792,
1081
+ "<|28.58|>": 51793,
1082
+ "<|28.60|>": 51794,
1083
+ "<|28.62|>": 51795,
1084
+ "<|28.64|>": 51796,
1085
+ "<|28.66|>": 51797,
1086
+ "<|28.68|>": 51798,
1087
+ "<|28.70|>": 51799,
1088
+ "<|28.72|>": 51800,
1089
+ "<|28.74|>": 51801,
1090
+ "<|28.76|>": 51802,
1091
+ "<|28.78|>": 51803,
1092
+ "<|28.80|>": 51804,
1093
+ "<|28.82|>": 51805,
1094
+ "<|28.84|>": 51806,
1095
+ "<|28.86|>": 51807,
1096
+ "<|28.88|>": 51808,
1097
+ "<|28.90|>": 51809,
1098
+ "<|28.92|>": 51810,
1099
+ "<|28.94|>": 51811,
1100
+ "<|28.96|>": 51812,
1101
+ "<|28.98|>": 51813,
1102
+ "<|29.00|>": 51814,
1103
+ "<|29.02|>": 51815,
1104
+ "<|29.04|>": 51816,
1105
+ "<|29.06|>": 51817,
1106
+ "<|29.08|>": 51818,
1107
+ "<|29.10|>": 51819,
1108
+ "<|29.12|>": 51820,
1109
+ "<|29.14|>": 51821,
1110
+ "<|29.16|>": 51822,
1111
+ "<|29.18|>": 51823,
1112
+ "<|29.20|>": 51824,
1113
+ "<|29.22|>": 51825,
1114
+ "<|29.24|>": 51826,
1115
+ "<|29.26|>": 51827,
1116
+ "<|29.28|>": 51828,
1117
+ "<|29.30|>": 51829,
1118
+ "<|29.32|>": 51830,
1119
+ "<|29.34|>": 51831,
1120
+ "<|29.36|>": 51832,
1121
+ "<|29.38|>": 51833,
1122
+ "<|29.40|>": 51834,
1123
+ "<|29.42|>": 51835,
1124
+ "<|29.44|>": 51836,
1125
+ "<|29.46|>": 51837,
1126
+ "<|29.48|>": 51838,
1127
+ "<|29.50|>": 51839,
1128
+ "<|29.52|>": 51840,
1129
+ "<|29.54|>": 51841,
1130
+ "<|29.56|>": 51842,
1131
+ "<|29.58|>": 51843,
1132
+ "<|29.60|>": 51844,
1133
+ "<|29.62|>": 51845,
1134
+ "<|29.64|>": 51846,
1135
+ "<|29.66|>": 51847,
1136
+ "<|29.68|>": 51848,
1137
+ "<|29.70|>": 51849,
1138
+ "<|29.72|>": 51850,
1139
+ "<|29.74|>": 51851,
1140
+ "<|29.76|>": 51852,
1141
+ "<|29.78|>": 51853,
1142
+ "<|29.80|>": 51854,
1143
+ "<|29.82|>": 51855,
1144
+ "<|29.84|>": 51856,
1145
+ "<|29.86|>": 51857,
1146
+ "<|29.88|>": 51858,
1147
+ "<|29.90|>": 51859,
1148
+ "<|29.92|>": 51860,
1149
+ "<|29.94|>": 51861,
1150
+ "<|29.96|>": 51862,
1151
+ "<|29.98|>": 51863,
1152
+ "<|3.00|>": 50514,
1153
+ "<|3.02|>": 50515,
1154
+ "<|3.04|>": 50516,
1155
+ "<|3.06|>": 50517,
1156
+ "<|3.08|>": 50518,
1157
+ "<|3.10|>": 50519,
1158
+ "<|3.12|>": 50520,
1159
+ "<|3.14|>": 50521,
1160
+ "<|3.16|>": 50522,
1161
+ "<|3.18|>": 50523,
1162
+ "<|3.20|>": 50524,
1163
+ "<|3.22|>": 50525,
1164
+ "<|3.24|>": 50526,
1165
+ "<|3.26|>": 50527,
1166
+ "<|3.28|>": 50528,
1167
+ "<|3.30|>": 50529,
1168
+ "<|3.32|>": 50530,
1169
+ "<|3.34|>": 50531,
1170
+ "<|3.36|>": 50532,
1171
+ "<|3.38|>": 50533,
1172
+ "<|3.40|>": 50534,
1173
+ "<|3.42|>": 50535,
1174
+ "<|3.44|>": 50536,
1175
+ "<|3.46|>": 50537,
1176
+ "<|3.48|>": 50538,
1177
+ "<|3.50|>": 50539,
1178
+ "<|3.52|>": 50540,
1179
+ "<|3.54|>": 50541,
1180
+ "<|3.56|>": 50542,
1181
+ "<|3.58|>": 50543,
1182
+ "<|3.60|>": 50544,
1183
+ "<|3.62|>": 50545,
1184
+ "<|3.64|>": 50546,
1185
+ "<|3.66|>": 50547,
1186
+ "<|3.68|>": 50548,
1187
+ "<|3.70|>": 50549,
1188
+ "<|3.72|>": 50550,
1189
+ "<|3.74|>": 50551,
1190
+ "<|3.76|>": 50552,
1191
+ "<|3.78|>": 50553,
1192
+ "<|3.80|>": 50554,
1193
+ "<|3.82|>": 50555,
1194
+ "<|3.84|>": 50556,
1195
+ "<|3.86|>": 50557,
1196
+ "<|3.88|>": 50558,
1197
+ "<|3.90|>": 50559,
1198
+ "<|3.92|>": 50560,
1199
+ "<|3.94|>": 50561,
1200
+ "<|3.96|>": 50562,
1201
+ "<|3.98|>": 50563,
1202
+ "<|30.00|>": 51864,
1203
+ "<|4.00|>": 50564,
1204
+ "<|4.02|>": 50565,
1205
+ "<|4.04|>": 50566,
1206
+ "<|4.06|>": 50567,
1207
+ "<|4.08|>": 50568,
1208
+ "<|4.10|>": 50569,
1209
+ "<|4.12|>": 50570,
1210
+ "<|4.14|>": 50571,
1211
+ "<|4.16|>": 50572,
1212
+ "<|4.18|>": 50573,
1213
+ "<|4.20|>": 50574,
1214
+ "<|4.22|>": 50575,
1215
+ "<|4.24|>": 50576,
1216
+ "<|4.26|>": 50577,
1217
+ "<|4.28|>": 50578,
1218
+ "<|4.30|>": 50579,
1219
+ "<|4.32|>": 50580,
1220
+ "<|4.34|>": 50581,
1221
+ "<|4.36|>": 50582,
1222
+ "<|4.38|>": 50583,
1223
+ "<|4.40|>": 50584,
1224
+ "<|4.42|>": 50585,
1225
+ "<|4.44|>": 50586,
1226
+ "<|4.46|>": 50587,
1227
+ "<|4.48|>": 50588,
1228
+ "<|4.50|>": 50589,
1229
+ "<|4.52|>": 50590,
1230
+ "<|4.54|>": 50591,
1231
+ "<|4.56|>": 50592,
1232
+ "<|4.58|>": 50593,
1233
+ "<|4.60|>": 50594,
1234
+ "<|4.62|>": 50595,
1235
+ "<|4.64|>": 50596,
1236
+ "<|4.66|>": 50597,
1237
+ "<|4.68|>": 50598,
1238
+ "<|4.70|>": 50599,
1239
+ "<|4.72|>": 50600,
1240
+ "<|4.74|>": 50601,
1241
+ "<|4.76|>": 50602,
1242
+ "<|4.78|>": 50603,
1243
+ "<|4.80|>": 50604,
1244
+ "<|4.82|>": 50605,
1245
+ "<|4.84|>": 50606,
1246
+ "<|4.86|>": 50607,
1247
+ "<|4.88|>": 50608,
1248
+ "<|4.90|>": 50609,
1249
+ "<|4.92|>": 50610,
1250
+ "<|4.94|>": 50611,
1251
+ "<|4.96|>": 50612,
1252
+ "<|4.98|>": 50613,
1253
+ "<|5.00|>": 50614,
1254
+ "<|5.02|>": 50615,
1255
+ "<|5.04|>": 50616,
1256
+ "<|5.06|>": 50617,
1257
+ "<|5.08|>": 50618,
1258
+ "<|5.10|>": 50619,
1259
+ "<|5.12|>": 50620,
1260
+ "<|5.14|>": 50621,
1261
+ "<|5.16|>": 50622,
1262
+ "<|5.18|>": 50623,
1263
+ "<|5.20|>": 50624,
1264
+ "<|5.22|>": 50625,
1265
+ "<|5.24|>": 50626,
1266
+ "<|5.26|>": 50627,
1267
+ "<|5.28|>": 50628,
1268
+ "<|5.30|>": 50629,
1269
+ "<|5.32|>": 50630,
1270
+ "<|5.34|>": 50631,
1271
+ "<|5.36|>": 50632,
1272
+ "<|5.38|>": 50633,
1273
+ "<|5.40|>": 50634,
1274
+ "<|5.42|>": 50635,
1275
+ "<|5.44|>": 50636,
1276
+ "<|5.46|>": 50637,
1277
+ "<|5.48|>": 50638,
1278
+ "<|5.50|>": 50639,
1279
+ "<|5.52|>": 50640,
1280
+ "<|5.54|>": 50641,
1281
+ "<|5.56|>": 50642,
1282
+ "<|5.58|>": 50643,
1283
+ "<|5.60|>": 50644,
1284
+ "<|5.62|>": 50645,
1285
+ "<|5.64|>": 50646,
1286
+ "<|5.66|>": 50647,
1287
+ "<|5.68|>": 50648,
1288
+ "<|5.70|>": 50649,
1289
+ "<|5.72|>": 50650,
1290
+ "<|5.74|>": 50651,
1291
+ "<|5.76|>": 50652,
1292
+ "<|5.78|>": 50653,
1293
+ "<|5.80|>": 50654,
1294
+ "<|5.82|>": 50655,
1295
+ "<|5.84|>": 50656,
1296
+ "<|5.86|>": 50657,
1297
+ "<|5.88|>": 50658,
1298
+ "<|5.90|>": 50659,
1299
+ "<|5.92|>": 50660,
1300
+ "<|5.94|>": 50661,
1301
+ "<|5.96|>": 50662,
1302
+ "<|5.98|>": 50663,
1303
+ "<|6.00|>": 50664,
1304
+ "<|6.02|>": 50665,
1305
+ "<|6.04|>": 50666,
1306
+ "<|6.06|>": 50667,
1307
+ "<|6.08|>": 50668,
1308
+ "<|6.10|>": 50669,
1309
+ "<|6.12|>": 50670,
1310
+ "<|6.14|>": 50671,
1311
+ "<|6.16|>": 50672,
1312
+ "<|6.18|>": 50673,
1313
+ "<|6.20|>": 50674,
1314
+ "<|6.22|>": 50675,
1315
+ "<|6.24|>": 50676,
1316
+ "<|6.26|>": 50677,
1317
+ "<|6.28|>": 50678,
1318
+ "<|6.30|>": 50679,
1319
+ "<|6.32|>": 50680,
1320
+ "<|6.34|>": 50681,
1321
+ "<|6.36|>": 50682,
1322
+ "<|6.38|>": 50683,
1323
+ "<|6.40|>": 50684,
1324
+ "<|6.42|>": 50685,
1325
+ "<|6.44|>": 50686,
1326
+ "<|6.46|>": 50687,
1327
+ "<|6.48|>": 50688,
1328
+ "<|6.50|>": 50689,
1329
+ "<|6.52|>": 50690,
1330
+ "<|6.54|>": 50691,
1331
+ "<|6.56|>": 50692,
1332
+ "<|6.58|>": 50693,
1333
+ "<|6.60|>": 50694,
1334
+ "<|6.62|>": 50695,
1335
+ "<|6.64|>": 50696,
1336
+ "<|6.66|>": 50697,
1337
+ "<|6.68|>": 50698,
1338
+ "<|6.70|>": 50699,
1339
+ "<|6.72|>": 50700,
1340
+ "<|6.74|>": 50701,
1341
+ "<|6.76|>": 50702,
1342
+ "<|6.78|>": 50703,
1343
+ "<|6.80|>": 50704,
1344
+ "<|6.82|>": 50705,
1345
+ "<|6.84|>": 50706,
1346
+ "<|6.86|>": 50707,
1347
+ "<|6.88|>": 50708,
1348
+ "<|6.90|>": 50709,
1349
+ "<|6.92|>": 50710,
1350
+ "<|6.94|>": 50711,
1351
+ "<|6.96|>": 50712,
1352
+ "<|6.98|>": 50713,
1353
+ "<|7.00|>": 50714,
1354
+ "<|7.02|>": 50715,
1355
+ "<|7.04|>": 50716,
1356
+ "<|7.06|>": 50717,
1357
+ "<|7.08|>": 50718,
1358
+ "<|7.10|>": 50719,
1359
+ "<|7.12|>": 50720,
1360
+ "<|7.14|>": 50721,
1361
+ "<|7.16|>": 50722,
1362
+ "<|7.18|>": 50723,
1363
+ "<|7.20|>": 50724,
1364
+ "<|7.22|>": 50725,
1365
+ "<|7.24|>": 50726,
1366
+ "<|7.26|>": 50727,
1367
+ "<|7.28|>": 50728,
1368
+ "<|7.30|>": 50729,
1369
+ "<|7.32|>": 50730,
1370
+ "<|7.34|>": 50731,
1371
+ "<|7.36|>": 50732,
1372
+ "<|7.38|>": 50733,
1373
+ "<|7.40|>": 50734,
1374
+ "<|7.42|>": 50735,
1375
+ "<|7.44|>": 50736,
1376
+ "<|7.46|>": 50737,
1377
+ "<|7.48|>": 50738,
1378
+ "<|7.50|>": 50739,
1379
+ "<|7.52|>": 50740,
1380
+ "<|7.54|>": 50741,
1381
+ "<|7.56|>": 50742,
1382
+ "<|7.58|>": 50743,
1383
+ "<|7.60|>": 50744,
1384
+ "<|7.62|>": 50745,
1385
+ "<|7.64|>": 50746,
1386
+ "<|7.66|>": 50747,
1387
+ "<|7.68|>": 50748,
1388
+ "<|7.70|>": 50749,
1389
+ "<|7.72|>": 50750,
1390
+ "<|7.74|>": 50751,
1391
+ "<|7.76|>": 50752,
1392
+ "<|7.78|>": 50753,
1393
+ "<|7.80|>": 50754,
1394
+ "<|7.82|>": 50755,
1395
+ "<|7.84|>": 50756,
1396
+ "<|7.86|>": 50757,
1397
+ "<|7.88|>": 50758,
1398
+ "<|7.90|>": 50759,
1399
+ "<|7.92|>": 50760,
1400
+ "<|7.94|>": 50761,
1401
+ "<|7.96|>": 50762,
1402
+ "<|7.98|>": 50763,
1403
+ "<|8.00|>": 50764,
1404
+ "<|8.02|>": 50765,
1405
+ "<|8.04|>": 50766,
1406
+ "<|8.06|>": 50767,
1407
+ "<|8.08|>": 50768,
1408
+ "<|8.10|>": 50769,
1409
+ "<|8.12|>": 50770,
1410
+ "<|8.14|>": 50771,
1411
+ "<|8.16|>": 50772,
1412
+ "<|8.18|>": 50773,
1413
+ "<|8.20|>": 50774,
1414
+ "<|8.22|>": 50775,
1415
+ "<|8.24|>": 50776,
1416
+ "<|8.26|>": 50777,
1417
+ "<|8.28|>": 50778,
1418
+ "<|8.30|>": 50779,
1419
+ "<|8.32|>": 50780,
1420
+ "<|8.34|>": 50781,
1421
+ "<|8.36|>": 50782,
1422
+ "<|8.38|>": 50783,
1423
+ "<|8.40|>": 50784,
1424
+ "<|8.42|>": 50785,
1425
+ "<|8.44|>": 50786,
1426
+ "<|8.46|>": 50787,
1427
+ "<|8.48|>": 50788,
1428
+ "<|8.50|>": 50789,
1429
+ "<|8.52|>": 50790,
1430
+ "<|8.54|>": 50791,
1431
+ "<|8.56|>": 50792,
1432
+ "<|8.58|>": 50793,
1433
+ "<|8.60|>": 50794,
1434
+ "<|8.62|>": 50795,
1435
+ "<|8.64|>": 50796,
1436
+ "<|8.66|>": 50797,
1437
+ "<|8.68|>": 50798,
1438
+ "<|8.70|>": 50799,
1439
+ "<|8.72|>": 50800,
1440
+ "<|8.74|>": 50801,
1441
+ "<|8.76|>": 50802,
1442
+ "<|8.78|>": 50803,
1443
+ "<|8.80|>": 50804,
1444
+ "<|8.82|>": 50805,
1445
+ "<|8.84|>": 50806,
1446
+ "<|8.86|>": 50807,
1447
+ "<|8.88|>": 50808,
1448
+ "<|8.90|>": 50809,
1449
+ "<|8.92|>": 50810,
1450
+ "<|8.94|>": 50811,
1451
+ "<|8.96|>": 50812,
1452
+ "<|8.98|>": 50813,
1453
+ "<|9.00|>": 50814,
1454
+ "<|9.02|>": 50815,
1455
+ "<|9.04|>": 50816,
1456
+ "<|9.06|>": 50817,
1457
+ "<|9.08|>": 50818,
1458
+ "<|9.10|>": 50819,
1459
+ "<|9.12|>": 50820,
1460
+ "<|9.14|>": 50821,
1461
+ "<|9.16|>": 50822,
1462
+ "<|9.18|>": 50823,
1463
+ "<|9.20|>": 50824,
1464
+ "<|9.22|>": 50825,
1465
+ "<|9.24|>": 50826,
1466
+ "<|9.26|>": 50827,
1467
+ "<|9.28|>": 50828,
1468
+ "<|9.30|>": 50829,
1469
+ "<|9.32|>": 50830,
1470
+ "<|9.34|>": 50831,
1471
+ "<|9.36|>": 50832,
1472
+ "<|9.38|>": 50833,
1473
+ "<|9.40|>": 50834,
1474
+ "<|9.42|>": 50835,
1475
+ "<|9.44|>": 50836,
1476
+ "<|9.46|>": 50837,
1477
+ "<|9.48|>": 50838,
1478
+ "<|9.50|>": 50839,
1479
+ "<|9.52|>": 50840,
1480
+ "<|9.54|>": 50841,
1481
+ "<|9.56|>": 50842,
1482
+ "<|9.58|>": 50843,
1483
+ "<|9.60|>": 50844,
1484
+ "<|9.62|>": 50845,
1485
+ "<|9.64|>": 50846,
1486
+ "<|9.66|>": 50847,
1487
+ "<|9.68|>": 50848,
1488
+ "<|9.70|>": 50849,
1489
+ "<|9.72|>": 50850,
1490
+ "<|9.74|>": 50851,
1491
+ "<|9.76|>": 50852,
1492
+ "<|9.78|>": 50853,
1493
+ "<|9.80|>": 50854,
1494
+ "<|9.82|>": 50855,
1495
+ "<|9.84|>": 50856,
1496
+ "<|9.86|>": 50857,
1497
+ "<|9.88|>": 50858,
1498
+ "<|9.90|>": 50859,
1499
+ "<|9.92|>": 50860,
1500
+ "<|9.94|>": 50861,
1501
+ "<|9.96|>": 50862,
1502
+ "<|9.98|>": 50863,
1503
+ "<|af|>": 50327,
1504
+ "<|am|>": 50334,
1505
+ "<|ar|>": 50272,
1506
+ "<|as|>": 50350,
1507
+ "<|az|>": 50304,
1508
+ "<|ba|>": 50355,
1509
+ "<|be|>": 50330,
1510
+ "<|bg|>": 50292,
1511
+ "<|bn|>": 50302,
1512
+ "<|bo|>": 50347,
1513
+ "<|br|>": 50309,
1514
+ "<|bs|>": 50315,
1515
+ "<|ca|>": 50270,
1516
+ "<|cs|>": 50283,
1517
+ "<|cy|>": 50297,
1518
+ "<|da|>": 50285,
1519
+ "<|de|>": 50261,
1520
+ "<|el|>": 50281,
1521
+ "<|en|>": 50259,
1522
+ "<|es|>": 50262,
1523
+ "<|et|>": 50307,
1524
+ "<|eu|>": 50310,
1525
+ "<|fa|>": 50300,
1526
+ "<|fi|>": 50277,
1527
+ "<|fo|>": 50338,
1528
+ "<|fr|>": 50265,
1529
+ "<|gl|>": 50319,
1530
+ "<|gu|>": 50333,
1531
+ "<|haw|>": 50352,
1532
+ "<|ha|>": 50354,
1533
+ "<|he|>": 50279,
1534
+ "<|hi|>": 50276,
1535
+ "<|hr|>": 50291,
1536
+ "<|ht|>": 50339,
1537
+ "<|hu|>": 50286,
1538
+ "<|hy|>": 50312,
1539
+ "<|id|>": 50275,
1540
+ "<|is|>": 50311,
1541
+ "<|it|>": 50274,
1542
+ "<|ja|>": 50266,
1543
+ "<|jw|>": 50356,
1544
+ "<|ka|>": 50329,
1545
+ "<|kk|>": 50316,
1546
+ "<|km|>": 50323,
1547
+ "<|kn|>": 50306,
1548
+ "<|ko|>": 50264,
1549
+ "<|la|>": 50294,
1550
+ "<|lb|>": 50345,
1551
+ "<|ln|>": 50353,
1552
+ "<|lo|>": 50336,
1553
+ "<|lt|>": 50293,
1554
+ "<|lv|>": 50301,
1555
+ "<|mg|>": 50349,
1556
+ "<|mi|>": 50295,
1557
+ "<|mk|>": 50308,
1558
+ "<|ml|>": 50296,
1559
+ "<|mn|>": 50314,
1560
+ "<|mr|>": 50320,
1561
+ "<|ms|>": 50282,
1562
+ "<|mt|>": 50343,
1563
+ "<|my|>": 50346,
1564
+ "<|ne|>": 50313,
1565
+ "<|nl|>": 50271,
1566
+ "<|nn|>": 50342,
1567
+ "<|nocaptions|>": 50362,
1568
+ "<|notimestamps|>": 50363,
1569
+ "<|no|>": 50288,
1570
+ "<|oc|>": 50328,
1571
+ "<|pa|>": 50321,
1572
+ "<|pl|>": 50269,
1573
+ "<|ps|>": 50340,
1574
+ "<|pt|>": 50267,
1575
+ "<|ro|>": 50284,
1576
+ "<|ru|>": 50263,
1577
+ "<|sa|>": 50344,
1578
+ "<|sd|>": 50332,
1579
+ "<|si|>": 50322,
1580
+ "<|sk|>": 50298,
1581
+ "<|sl|>": 50305,
1582
+ "<|sn|>": 50324,
1583
+ "<|so|>": 50326,
1584
+ "<|sq|>": 50317,
1585
+ "<|sr|>": 50303,
1586
+ "<|startoflm|>": 50360,
1587
+ "<|startofprev|>": 50361,
1588
+ "<|startoftranscript|>": 50258,
1589
+ "<|su|>": 50357,
1590
+ "<|sv|>": 50273,
1591
+ "<|sw|>": 50318,
1592
+ "<|ta|>": 50287,
1593
+ "<|te|>": 50299,
1594
+ "<|tg|>": 50331,
1595
+ "<|th|>": 50289,
1596
+ "<|tk|>": 50341,
1597
+ "<|tl|>": 50348,
1598
+ "<|transcribe|>": 50359,
1599
+ "<|translate|>": 50358,
1600
+ "<|tr|>": 50268,
1601
+ "<|tt|>": 50351,
1602
+ "<|uk|>": 50280,
1603
+ "<|ur|>": 50290,
1604
+ "<|uz|>": 50337,
1605
+ "<|vi|>": 50278,
1606
+ "<|yi|>": 50335,
1607
+ "<|yo|>": 50325,
1608
+ "<|zh|>": 50260
1609
+ }
checkpoint-1000-epoch-1/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6a14d82af0a1f21c72784c5e525a9bd893757635570328d71ff3f51ccbeed46
3
+ size 290403936
checkpoint-1000-epoch-1/model_1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ebbfa67ed17e8d4731d4904c8b72bb4c347e491079f837135848fc5921cf714
3
+ size 1527827760
checkpoint-1000-epoch-1/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e6e6a9733d53f7055f0aabfe06dea59b696224803fbdf173ab61b9c5bc59774
3
+ size 414315002
checkpoint-1000-epoch-1/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faf55774fc9d96efa5f77b392c5e764b5682bd3563351153421507b3104021c1
3
+ size 14344
checkpoint-1000-epoch-1/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b5cc1ed35f48cc59384f56c8de34e461d08b0f64957aa14f6f8878a25fc4550
3
+ size 988
checkpoint-1000-epoch-1/scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7d9a456a4ef4407b1335101c008a172ceb633b64c0a17bc1c26892624f59b74
3
+ size 1064
common_voice_16_1_ru_pseudo_labelled/dataset_dict.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"splits": ["train", "validation", "test"]}
common_voice_16_1_ru_pseudo_labelled/distil-whisper/events.out.tfevents.1712401565.train02.3718604.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ebcefbc9c607b8be793477dcb7506ac85da17198e7707423a36394e10dde377
3
+ size 88
common_voice_16_1_ru_pseudo_labelled/distil-whisper/events.out.tfevents.1712401615.train02.3718686.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:159da1d10980f8b1838724cd6752c8457b23222da1e5efa0b02f07c59519f383
3
+ size 88
common_voice_16_1_ru_pseudo_labelled/distil-whisper/events.out.tfevents.1712402554.train02.3719243.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5b309c5db9330902724f78870dc5f56401f6783f1834120e603cc2503c47826
3
+ size 88
common_voice_16_1_ru_pseudo_labelled/distil-whisper/events.out.tfevents.1712402791.train02.3719643.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8aa2b2bbce0d6b9c73776887a9377b57c1e47e46481f9f034a72f76f96c86025
3
+ size 88
common_voice_16_1_ru_pseudo_labelled/distil-whisper/events.out.tfevents.1712402838.train02.3720027.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82ade683d71eb999be3585d981244c95e024460f69e4a5bb25f9a398e754c1a7
3
+ size 278
common_voice_16_1_ru_pseudo_labelled/test-transcription.csv ADDED
The diff for this file is too large to render. See raw diff
 
common_voice_16_1_ru_pseudo_labelled/test/data-00000-of-00008.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b54a7e0fe4128da0e00076e3b4e7618c546d0b1dc127ee2b0fafbfa2448ee186
3
+ size 227861264
common_voice_16_1_ru_pseudo_labelled/test/data-00001-of-00008.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:942d41d757ca538c0dff0e981fa66593a5dfeb22a1f437cd7d625cd72c2f63d7
3
+ size 226518912
common_voice_16_1_ru_pseudo_labelled/test/data-00002-of-00008.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ada1971f117665401c187b473a9fd9c31a425c8866c3fd6d1c4e3e609b961ee5
3
+ size 228069912
common_voice_16_1_ru_pseudo_labelled/test/data-00003-of-00008.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5451e1f05f4c5e73ac17535bd136ed9478944ce4a233de68dfd162488dd30155
3
+ size 226867232
common_voice_16_1_ru_pseudo_labelled/test/data-00004-of-00008.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44c9a831f2ace64e02575c25be084146a177d0db32b37b6d8ecc0f8db95866de
3
+ size 228381176
common_voice_16_1_ru_pseudo_labelled/test/data-00005-of-00008.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a5c89a578be64c438485f76fd6b46d71482a328c77a7f45e717e64f98b3bf46
3
+ size 226157808
common_voice_16_1_ru_pseudo_labelled/test/data-00006-of-00008.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:709edcfa38745ebcc3fc50f6976e13aabf68f1f868b2caac9a98a8ab72db93d9
3
+ size 228389768
common_voice_16_1_ru_pseudo_labelled/test/data-00007-of-00008.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e864d947e7da086fc30ee83fe4a36d8544350d488f7632e21417fb98a5b9d88
3
+ size 226235592
common_voice_16_1_ru_pseudo_labelled/test/dataset_info.json ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "builder_name": "common_voice_16_1",
3
+ "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n",
4
+ "config_name": "ru",
5
+ "dataset_name": "common_voice_16_1",
6
+ "dataset_size": 44414679,
7
+ "description": "Common Voice is Mozilla's initiative to help teach machines how real people speak. The dataset currently consists of 19915 validated hours of speech in 120 languages, but more voices and languages are always added.",
8
+ "download_checksums": {
9
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/n_shards.json": {
10
+ "num_bytes": 14607,
11
+ "checksum": null
12
+ },
13
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/audio/ru/train/ru_train_0.tar": {
14
+ "num_bytes": 975953920,
15
+ "checksum": null
16
+ },
17
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/audio/ru/dev/ru_dev_0.tar": {
18
+ "num_bytes": 392980480,
19
+ "checksum": null
20
+ },
21
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/audio/ru/test/ru_test_0.tar": {
22
+ "num_bytes": 396625920,
23
+ "checksum": null
24
+ },
25
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/audio/ru/other/ru_other_0.tar": {
26
+ "num_bytes": 430479360,
27
+ "checksum": null
28
+ },
29
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/audio/ru/invalidated/ru_invalidated_0.tar": {
30
+ "num_bytes": 382126080,
31
+ "checksum": null
32
+ },
33
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/transcript/ru/train.tsv": {
34
+ "num_bytes": 7900935,
35
+ "checksum": null
36
+ },
37
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/transcript/ru/dev.tsv": {
38
+ "num_bytes": 2994516,
39
+ "checksum": null
40
+ },
41
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/transcript/ru/test.tsv": {
42
+ "num_bytes": 2930494,
43
+ "checksum": null
44
+ },
45
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/transcript/ru/other.tsv": {
46
+ "num_bytes": 3734943,
47
+ "checksum": null
48
+ },
49
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/transcript/ru/invalidated.tsv": {
50
+ "num_bytes": 3034902,
51
+ "checksum": null
52
+ }
53
+ },
54
+ "download_size": 2598776157,
55
+ "features": {
56
+ "path": {
57
+ "dtype": "string",
58
+ "_type": "Value"
59
+ },
60
+ "audio": {
61
+ "sampling_rate": 16000,
62
+ "_type": "Audio"
63
+ },
64
+ "sentence": {
65
+ "dtype": "string",
66
+ "_type": "Value"
67
+ },
68
+ "condition_on_prev": {
69
+ "feature": {
70
+ "dtype": "int64",
71
+ "_type": "Value"
72
+ },
73
+ "_type": "Sequence"
74
+ },
75
+ "whisper_transcript": {
76
+ "dtype": "string",
77
+ "_type": "Value"
78
+ }
79
+ },
80
+ "homepage": "https://commonvoice.mozilla.org/en/datasets",
81
+ "license": "https://creativecommons.org/publicdomain/zero/1.0/",
82
+ "size_in_bytes": 2643190836,
83
+ "splits": {
84
+ "train": {
85
+ "name": "train",
86
+ "num_bytes": 16997252,
87
+ "num_examples": 26359,
88
+ "dataset_name": "common_voice_16_1"
89
+ },
90
+ "validation": {
91
+ "name": "validation",
92
+ "num_bytes": 6473223,
93
+ "num_examples": 10198,
94
+ "dataset_name": "common_voice_16_1"
95
+ },
96
+ "test": {
97
+ "name": "test",
98
+ "num_bytes": 6429932,
99
+ "num_examples": 10199,
100
+ "dataset_name": "common_voice_16_1"
101
+ },
102
+ "other": {
103
+ "name": "other",
104
+ "num_bytes": 7941931,
105
+ "num_examples": 12190,
106
+ "dataset_name": "common_voice_16_1"
107
+ },
108
+ "invalidated": {
109
+ "name": "invalidated",
110
+ "num_bytes": 6572341,
111
+ "num_examples": 9906,
112
+ "dataset_name": "common_voice_16_1"
113
+ }
114
+ },
115
+ "version": {
116
+ "version_str": "16.1.0",
117
+ "major": 16,
118
+ "minor": 1,
119
+ "patch": 0
120
+ }
121
+ }
common_voice_16_1_ru_pseudo_labelled/test/state.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00008.arrow"
5
+ },
6
+ {
7
+ "filename": "data-00001-of-00008.arrow"
8
+ },
9
+ {
10
+ "filename": "data-00002-of-00008.arrow"
11
+ },
12
+ {
13
+ "filename": "data-00003-of-00008.arrow"
14
+ },
15
+ {
16
+ "filename": "data-00004-of-00008.arrow"
17
+ },
18
+ {
19
+ "filename": "data-00005-of-00008.arrow"
20
+ },
21
+ {
22
+ "filename": "data-00006-of-00008.arrow"
23
+ },
24
+ {
25
+ "filename": "data-00007-of-00008.arrow"
26
+ }
27
+ ],
28
+ "_fingerprint": "cd4e983fa32b0be9",
29
+ "_format_columns": null,
30
+ "_format_kwargs": {},
31
+ "_format_type": null,
32
+ "_output_all_columns": false,
33
+ "_split": null
34
+ }
common_voice_16_1_ru_pseudo_labelled/train-transcription.csv ADDED
The diff for this file is too large to render. See raw diff
 
common_voice_16_1_ru_pseudo_labelled/train/data-00000-of-00009.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d23c4c3f09d66293edb9944829d8a08527399878dea7de29b5f266263a86ce77
3
+ size 479764376
common_voice_16_1_ru_pseudo_labelled/train/data-00001-of-00009.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5da599ccad1f457a30c798a2776db8e42ed36f5407313eedc38176adbdf4975c
3
+ size 481844912
common_voice_16_1_ru_pseudo_labelled/train/data-00002-of-00009.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ab8106459a8e72da743ed18a3efcbcdf21fb8e9e74b6c17e0ad6a9a665de6b2
3
+ size 476891608
common_voice_16_1_ru_pseudo_labelled/train/data-00003-of-00009.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fc57ac070725f989f62e779811dd15ef7448ae8db5246eab7f9d860f81b74f4
3
+ size 481284128
common_voice_16_1_ru_pseudo_labelled/train/data-00004-of-00009.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48e35bb858ce767ef1123cf229b2dc5cedb389cb2d29eeb5d7d2ed763de5ac1b
3
+ size 483754064
common_voice_16_1_ru_pseudo_labelled/train/data-00005-of-00009.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03a43b72863d774d3ad61f8615e50634edf6363eba3c131b3d46dcad3eb65151
3
+ size 482936904
common_voice_16_1_ru_pseudo_labelled/train/data-00006-of-00009.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16663003ef8af3820eccb7b20354da95a1bc9e4254c9591434c44cce7b474c4a
3
+ size 481316296
common_voice_16_1_ru_pseudo_labelled/train/data-00007-of-00009.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d756b149d493b5da31256dcb4535cecb72b033574f5cb97274a3971fc07539c
3
+ size 484716280
common_voice_16_1_ru_pseudo_labelled/train/data-00008-of-00009.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:602e569820c9d2410d7f643bc9a7bce0d01924c2f2527b70a440738cd14ae6f3
3
+ size 480744040
common_voice_16_1_ru_pseudo_labelled/train/dataset_info.json ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "builder_name": "common_voice_16_1",
3
+ "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n",
4
+ "config_name": "ru",
5
+ "dataset_name": "common_voice_16_1",
6
+ "dataset_size": 44414679,
7
+ "description": "Common Voice is Mozilla's initiative to help teach machines how real people speak. The dataset currently consists of 19915 validated hours of speech in 120 languages, but more voices and languages are always added.",
8
+ "download_checksums": {
9
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/n_shards.json": {
10
+ "num_bytes": 14607,
11
+ "checksum": null
12
+ },
13
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/audio/ru/train/ru_train_0.tar": {
14
+ "num_bytes": 975953920,
15
+ "checksum": null
16
+ },
17
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/audio/ru/dev/ru_dev_0.tar": {
18
+ "num_bytes": 392980480,
19
+ "checksum": null
20
+ },
21
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/audio/ru/test/ru_test_0.tar": {
22
+ "num_bytes": 396625920,
23
+ "checksum": null
24
+ },
25
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/audio/ru/other/ru_other_0.tar": {
26
+ "num_bytes": 430479360,
27
+ "checksum": null
28
+ },
29
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/audio/ru/invalidated/ru_invalidated_0.tar": {
30
+ "num_bytes": 382126080,
31
+ "checksum": null
32
+ },
33
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/transcript/ru/train.tsv": {
34
+ "num_bytes": 7900935,
35
+ "checksum": null
36
+ },
37
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/transcript/ru/dev.tsv": {
38
+ "num_bytes": 2994516,
39
+ "checksum": null
40
+ },
41
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/transcript/ru/test.tsv": {
42
+ "num_bytes": 2930494,
43
+ "checksum": null
44
+ },
45
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/transcript/ru/other.tsv": {
46
+ "num_bytes": 3734943,
47
+ "checksum": null
48
+ },
49
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/transcript/ru/invalidated.tsv": {
50
+ "num_bytes": 3034902,
51
+ "checksum": null
52
+ }
53
+ },
54
+ "download_size": 2598776157,
55
+ "features": {
56
+ "path": {
57
+ "dtype": "string",
58
+ "_type": "Value"
59
+ },
60
+ "audio": {
61
+ "sampling_rate": 16000,
62
+ "_type": "Audio"
63
+ },
64
+ "sentence": {
65
+ "dtype": "string",
66
+ "_type": "Value"
67
+ },
68
+ "condition_on_prev": {
69
+ "feature": {
70
+ "dtype": "int64",
71
+ "_type": "Value"
72
+ },
73
+ "_type": "Sequence"
74
+ },
75
+ "whisper_transcript": {
76
+ "dtype": "string",
77
+ "_type": "Value"
78
+ }
79
+ },
80
+ "homepage": "https://commonvoice.mozilla.org/en/datasets",
81
+ "license": "https://creativecommons.org/publicdomain/zero/1.0/",
82
+ "size_in_bytes": 2643190836,
83
+ "splits": {
84
+ "train": {
85
+ "name": "train",
86
+ "num_bytes": 16997252,
87
+ "num_examples": 26359,
88
+ "dataset_name": "common_voice_16_1"
89
+ },
90
+ "validation": {
91
+ "name": "validation",
92
+ "num_bytes": 6473223,
93
+ "num_examples": 10198,
94
+ "dataset_name": "common_voice_16_1"
95
+ },
96
+ "test": {
97
+ "name": "test",
98
+ "num_bytes": 6429932,
99
+ "num_examples": 10199,
100
+ "dataset_name": "common_voice_16_1"
101
+ },
102
+ "other": {
103
+ "name": "other",
104
+ "num_bytes": 7941931,
105
+ "num_examples": 12190,
106
+ "dataset_name": "common_voice_16_1"
107
+ },
108
+ "invalidated": {
109
+ "name": "invalidated",
110
+ "num_bytes": 6572341,
111
+ "num_examples": 9906,
112
+ "dataset_name": "common_voice_16_1"
113
+ }
114
+ },
115
+ "version": {
116
+ "version_str": "16.1.0",
117
+ "major": 16,
118
+ "minor": 1,
119
+ "patch": 0
120
+ }
121
+ }
common_voice_16_1_ru_pseudo_labelled/train/state.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00009.arrow"
5
+ },
6
+ {
7
+ "filename": "data-00001-of-00009.arrow"
8
+ },
9
+ {
10
+ "filename": "data-00002-of-00009.arrow"
11
+ },
12
+ {
13
+ "filename": "data-00003-of-00009.arrow"
14
+ },
15
+ {
16
+ "filename": "data-00004-of-00009.arrow"
17
+ },
18
+ {
19
+ "filename": "data-00005-of-00009.arrow"
20
+ },
21
+ {
22
+ "filename": "data-00006-of-00009.arrow"
23
+ },
24
+ {
25
+ "filename": "data-00007-of-00009.arrow"
26
+ },
27
+ {
28
+ "filename": "data-00008-of-00009.arrow"
29
+ }
30
+ ],
31
+ "_fingerprint": "233a9033860f5984",
32
+ "_format_columns": null,
33
+ "_format_kwargs": {},
34
+ "_format_type": null,
35
+ "_output_all_columns": false,
36
+ "_split": null
37
+ }
common_voice_16_1_ru_pseudo_labelled/validation-transcription.csv ADDED
The diff for this file is too large to render. See raw diff
 
common_voice_16_1_ru_pseudo_labelled/validation/data-00000-of-00008.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bd5889556faf53cc2ac3cd2e509874ae43fdfff94de17e601f21c72b725427e
3
+ size 219095424
common_voice_16_1_ru_pseudo_labelled/validation/data-00001-of-00008.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e08e9a089d8b666c0b307593e9f43fd67f28feafde5d4c53697c6ac839ab5509
3
+ size 221546128
common_voice_16_1_ru_pseudo_labelled/validation/data-00002-of-00008.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbd4dcf27b23ead8a0e82d81e295810508a8c53d43d37bbaaece8f4ddbce1088
3
+ size 218369688
common_voice_16_1_ru_pseudo_labelled/validation/data-00003-of-00008.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be63fcbcfd27bc11c7441ccc4a57aeb35236543b7a3f06e7d5db4af7ff99e3ed
3
+ size 220334584
common_voice_16_1_ru_pseudo_labelled/validation/data-00004-of-00008.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e865856feb74a712066c5424476c4a6b29e81f845be2b5e8b0e4a1a5ef65464d
3
+ size 217380464
common_voice_16_1_ru_pseudo_labelled/validation/data-00005-of-00008.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:741d669e1a54730b439f95b15d05155b72bffc5b4ca7a1e2aa82a0d67b8c22b0
3
+ size 221360016
common_voice_16_1_ru_pseudo_labelled/validation/data-00006-of-00008.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91283ab84abef71ebef48112fad017718c63d1fac4728efb2a1cbd35546150e1
3
+ size 220875280
common_voice_16_1_ru_pseudo_labelled/validation/data-00007-of-00008.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a19135b93c17d3b51cdb53c043b4d6d555655e78b9388f2db969a570e1f5de7f
3
+ size 221241816
common_voice_16_1_ru_pseudo_labelled/validation/dataset_info.json ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "builder_name": "common_voice_16_1",
3
+ "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n",
4
+ "config_name": "ru",
5
+ "dataset_name": "common_voice_16_1",
6
+ "dataset_size": 44414679,
7
+ "description": "Common Voice is Mozilla's initiative to help teach machines how real people speak. The dataset currently consists of 19915 validated hours of speech in 120 languages, but more voices and languages are always added.",
8
+ "download_checksums": {
9
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/n_shards.json": {
10
+ "num_bytes": 14607,
11
+ "checksum": null
12
+ },
13
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/audio/ru/train/ru_train_0.tar": {
14
+ "num_bytes": 975953920,
15
+ "checksum": null
16
+ },
17
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/audio/ru/dev/ru_dev_0.tar": {
18
+ "num_bytes": 392980480,
19
+ "checksum": null
20
+ },
21
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/audio/ru/test/ru_test_0.tar": {
22
+ "num_bytes": 396625920,
23
+ "checksum": null
24
+ },
25
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/audio/ru/other/ru_other_0.tar": {
26
+ "num_bytes": 430479360,
27
+ "checksum": null
28
+ },
29
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/audio/ru/invalidated/ru_invalidated_0.tar": {
30
+ "num_bytes": 382126080,
31
+ "checksum": null
32
+ },
33
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/transcript/ru/train.tsv": {
34
+ "num_bytes": 7900935,
35
+ "checksum": null
36
+ },
37
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/transcript/ru/dev.tsv": {
38
+ "num_bytes": 2994516,
39
+ "checksum": null
40
+ },
41
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/transcript/ru/test.tsv": {
42
+ "num_bytes": 2930494,
43
+ "checksum": null
44
+ },
45
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/transcript/ru/other.tsv": {
46
+ "num_bytes": 3734943,
47
+ "checksum": null
48
+ },
49
+ "https://huggingface.co/datasets/reach-vb/common_voice_16_1/resolve/main/transcript/ru/invalidated.tsv": {
50
+ "num_bytes": 3034902,
51
+ "checksum": null
52
+ }
53
+ },
54
+ "download_size": 2598776157,
55
+ "features": {
56
+ "path": {
57
+ "dtype": "string",
58
+ "_type": "Value"
59
+ },
60
+ "audio": {
61
+ "sampling_rate": 16000,
62
+ "_type": "Audio"
63
+ },
64
+ "sentence": {
65
+ "dtype": "string",
66
+ "_type": "Value"
67
+ },
68
+ "condition_on_prev": {
69
+ "feature": {
70
+ "dtype": "int64",
71
+ "_type": "Value"
72
+ },
73
+ "_type": "Sequence"
74
+ },
75
+ "whisper_transcript": {
76
+ "dtype": "string",
77
+ "_type": "Value"
78
+ }
79
+ },
80
+ "homepage": "https://commonvoice.mozilla.org/en/datasets",
81
+ "license": "https://creativecommons.org/publicdomain/zero/1.0/",
82
+ "size_in_bytes": 2643190836,
83
+ "splits": {
84
+ "train": {
85
+ "name": "train",
86
+ "num_bytes": 16997252,
87
+ "num_examples": 26359,
88
+ "dataset_name": "common_voice_16_1"
89
+ },
90
+ "validation": {
91
+ "name": "validation",
92
+ "num_bytes": 6473223,
93
+ "num_examples": 10198,
94
+ "dataset_name": "common_voice_16_1"
95
+ },
96
+ "test": {
97
+ "name": "test",
98
+ "num_bytes": 6429932,
99
+ "num_examples": 10199,
100
+ "dataset_name": "common_voice_16_1"
101
+ },
102
+ "other": {
103
+ "name": "other",
104
+ "num_bytes": 7941931,
105
+ "num_examples": 12190,
106
+ "dataset_name": "common_voice_16_1"
107
+ },
108
+ "invalidated": {
109
+ "name": "invalidated",
110
+ "num_bytes": 6572341,
111
+ "num_examples": 9906,
112
+ "dataset_name": "common_voice_16_1"
113
+ }
114
+ },
115
+ "version": {
116
+ "version_str": "16.1.0",
117
+ "major": 16,
118
+ "minor": 1,
119
+ "patch": 0
120
+ }
121
+ }
common_voice_16_1_ru_pseudo_labelled/validation/state.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00008.arrow"
5
+ },
6
+ {
7
+ "filename": "data-00001-of-00008.arrow"
8
+ },
9
+ {
10
+ "filename": "data-00002-of-00008.arrow"
11
+ },
12
+ {
13
+ "filename": "data-00003-of-00008.arrow"
14
+ },
15
+ {
16
+ "filename": "data-00004-of-00008.arrow"
17
+ },
18
+ {
19
+ "filename": "data-00005-of-00008.arrow"
20
+ },
21
+ {
22
+ "filename": "data-00006-of-00008.arrow"
23
+ },
24
+ {
25
+ "filename": "data-00007-of-00008.arrow"
26
+ }
27
+ ],
28
+ "_fingerprint": "50f6c8363f531de4",
29
+ "_format_columns": null,
30
+ "_format_kwargs": {},
31
+ "_format_type": null,
32
+ "_output_all_columns": false,
33
+ "_split": null
34
+ }