Training in progress, step 500

Browse files

Files changed (14) hide show

.gitignore +1 -0
added_tokens.json +1 -0
config.json +107 -0
nohup.out +0 -0
preprocessor_config.json +9 -0
pytorch_model.bin +3 -0
run.sh +37 -0
run_speech_recognition_ctc.py +737 -0
runs/Feb06_10-51-19_job-aa543290-d6de-4d4d-8a32-2149b1b8e7e3/1644145311.054961/events.out.tfevents.1644145311.job-aa543290-d6de-4d4d-8a32-2149b1b8e7e3 +3 -0
runs/Feb06_10-51-19_job-aa543290-d6de-4d4d-8a32-2149b1b8e7e3/events.out.tfevents.1644145311.job-aa543290-d6de-4d4d-8a32-2149b1b8e7e3 +3 -0
special_tokens_map.json +1 -0
tokenizer_config.json +1 -0
training_args.bin +3 -0
vocab.json +1 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ checkpoint-*/

added_tokens.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"<s>": 4626, "</s>": 4627}

config.json ADDED Viewed

	@@ -0,0 +1,107 @@

+{
+  "_name_or_path": "facebook/wav2vec2-xls-r-300m",
+  "activation_dropout": 0.1,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForCTC"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 768,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "mean",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.0,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.0,
+  "mask_feature_length": 64,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.25,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.75,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 24,
+  "num_negatives": 100,
+  "output_hidden_size": 1024,
+  "pad_token_id": 4625,
+  "proj_codevector_dim": 768,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.17.0.dev0",
+  "use_weighted_layer_sum": false,
+  "vocab_size": 4628,
+  "xvector_output_dim": 512
+}

nohup.out ADDED Viewed

The diff for this file is too large to render. See raw diff

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3d18e150847afb5abd173e563f58448386b7d30f7ab2bdb26d38d50477c73919
+size 1280898545

run.sh ADDED Viewed

	@@ -0,0 +1,37 @@

+python run_speech_recognition_ctc.py \
+    --dataset_name="common_voice" \
+    --model_name_or_path="facebook/wav2vec2-xls-r-300m" \
+    --dataset_config_name="zh-CN" \
+    --output_dir="./" \
+    --overwrite_output_dir \
+    --num_train_epochs="100" \
+    --per_device_train_batch_size="8" \
+    --per_device_eval_batch_size="8" \
+    --gradient_accumulation_steps="4" \
+    --learning_rate="7.5e-5" \
+    --warmup_steps="2000" \
+    --length_column_name="input_length" \
+    --max_duration_in_seconds="7" \
+    --max_eval_samples="3000" \
+    --evaluation_strategy="steps" \
+    --text_column_name="sentence" \
+    --chars_to_ignore , ? . ! \- \; \: \" “ % ‘ ” � — ’ … – ！ － ： – 。 》 , ） , ？ ； ～ ~ … ︰ ， （ 」 ‧ 《 ﹔ 、 — ／ , 「 ﹖ · \
+    --save_steps="500" \
+    --eval_steps="500" \
+    --logging_steps="100" \
+    --layerdrop="0.0" \
+    --activation_dropout="0.1" \
+    --save_total_limit="3" \
+    --freeze_feature_encoder \
+    --feat_proj_dropout="0.0" \
+    --mask_time_prob="0.75" \
+    --mask_time_length="10" \
+    --mask_feature_prob="0.25" \
+    --mask_feature_length="64" \
+    --gradient_checkpointing \
+    --use_auth_token \
+    --fp16 \
+    --group_by_length \
+    --do_train --do_eval \
+    --report_to="tensorboard" \
+    --push_to_hub

run_speech_recognition_ctc.py ADDED Viewed

	@@ -0,0 +1,737 @@

+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+""" Fine-tuning a 🤗 Transformers CTC model for automatic speech recognition"""
+import functools
+import json
+import logging
+import os
+import re
+import sys
+import warnings
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional, Union
+import datasets
+import numpy as np
+import torch
+from datasets import DatasetDict, load_dataset, load_metric
+import transformers
+from transformers import (
+    AutoConfig,
+    AutoFeatureExtractor,
+    AutoModelForCTC,
+    AutoProcessor,
+    AutoTokenizer,
+    HfArgumentParser,
+    Trainer,
+    TrainingArguments,
+    Wav2Vec2Processor,
+    set_seed,
+)
+from transformers.trainer_utils import get_last_checkpoint, is_main_process
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.17.0.dev0")
+require_version("datasets>=1.13.3", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt")
+logger = logging.getLogger(__name__)
+def list_field(default=None, metadata=None):
+    return field(default_factory=lambda: default, metadata=metadata)
+@dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    tokenizer_name_or_path: Optional[str] = field(
+        default=None,
+        metadata={"help": "Path to pretrained tokenizer or tokenizer identifier from huggingface.co/models"},
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
+    )
+    freeze_feature_encoder: bool = field(
+        default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
+    )
+    attention_dropout: float = field(
+        default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."}
+    )
+    activation_dropout: float = field(
+        default=0.0, metadata={"help": "The dropout ratio for activations inside the fully connected layer."}
+    )
+    feat_proj_dropout: float = field(default=0.0, metadata={"help": "The dropout ratio for the projected features."})
+    hidden_dropout: float = field(
+        default=0.0,
+        metadata={
+            "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
+        },
+    )
+    final_dropout: float = field(
+        default=0.0,
+        metadata={"help": "The dropout probability for the final projection layer."},
+    )
+    mask_time_prob: float = field(
+        default=0.05,
+        metadata={
+            "help": "Probability of each feature vector along the time axis to be chosen as the start of the vector"
+            "span to be masked. Approximately ``mask_time_prob * sequence_length // mask_time_length`` feature"
+            "vectors will be masked along the time axis."
+        },
+    )
+    mask_time_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the time axis."},
+    )
+    mask_feature_prob: float = field(
+        default=0.0,
+        metadata={
+            "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
+            "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
+        },
+    )
+    mask_feature_length: int = field(
+        default=10,
+        metadata={"help": "Length of vector span to mask along the feature axis."},
+    )
+    layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."})
+    ctc_loss_reduction: Optional[str] = field(
+        default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
+    )
+@dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    Using `HfArgumentParser` we can turn this class
+    into argparse arguments to be able to specify them on
+    the command line.
+    """
+    dataset_name: str = field(
+        metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: str = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    train_split_name: str = field(
+        default="train+validation",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
+        },
+    )
+    eval_split_name: str = field(
+        default="test",
+        metadata={
+            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'test'"
+        },
+    )
+    audio_column_name: str = field(
+        default="audio",
+        metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
+    )
+    text_column_name: str = field(
+        default="text",
+        metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached preprocessed datasets or not."}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_eval_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of validation examples to this "
+            "value if set."
+        },
+    )
+    chars_to_ignore: Optional[List[str]] = list_field(
+        default=None,
+        metadata={"help": "A list of characters to remove from the transcripts."},
+    )
+    eval_metrics: List[str] = list_field(
+        default=["wer", "cer"],
+        metadata={"help": "A list of metrics the model should be evaluated on. E.g. `'wer cer'`"},
+    )
+    max_duration_in_seconds: float = field(
+        default=20.0,
+        metadata={
+            "help": "Filter audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
+        },
+    )
+    min_duration_in_seconds: float = field(
+        default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
+    )
+    preprocessing_only: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to only do data preprocessing and skip training. "
+            "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
+            "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
+            "so that the cached datasets can consequently be loaded in distributed training"
+        },
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "If :obj:`True`, will use the token generated when running"
+            ":obj:`transformers-cli login` as HTTP bearer authorization for remote files."
+        },
+    )
+    unk_token: str = field(
+        default="[UNK]",
+        metadata={"help": "The unk token for the tokenizer"},
+    )
+    pad_token: str = field(
+        default="[PAD]",
+        metadata={"help": "The padding token for the tokenizer"},
+    )
+    word_delimiter_token: str = field(
+        default="|",
+        metadata={"help": "The word delimiter token for the tokenizer"},
+    )
+    phoneme_language: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": "The target language that should be used be"
+            " passed to the tokenizer for tokenization. Note that"
+            " this is only relevant if the model classifies the"
+            " input audio to a sequence of phoneme sequences."
+        },
+    )
+@dataclass
+class DataCollatorCTCWithPadding:
+    """
+    Data collator that will dynamically pad the inputs received.
+    Args:
+        processor (:class:`~transformers.AutoProcessor`)
+            The processor used for proccessing the data.
+        padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
+            among:
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        max_length (:obj:`int`, `optional`):
+            Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
+        max_length_labels (:obj:`int`, `optional`):
+            Maximum length of the ``labels`` returned list and optionally padding length (see above).
+        pad_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the sequence to a multiple of the provided value.
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+    """
+    processor: AutoProcessor
+    padding: Union[bool, str] = "longest"
+    pad_to_multiple_of: Optional[int] = None
+    pad_to_multiple_of_labels: Optional[int] = None
+    def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
+        # split inputs and labels since they have to be of different lenghts and need
+        # different padding methods
+        input_features = [{"input_values": feature["input_values"]} for feature in features]
+        label_features = [{"input_ids": feature["labels"]} for feature in features]
+        batch = self.processor.pad(
+            input_features,
+            padding=self.padding,
+            pad_to_multiple_of=self.pad_to_multiple_of,
+            return_tensors="pt",
+        )
+        with self.processor.as_target_processor():
+            labels_batch = self.processor.pad(
+                label_features,
+                padding=self.padding,
+                pad_to_multiple_of=self.pad_to_multiple_of_labels,
+                return_tensors="pt",
+            )
+        # replace padding with -100 to ignore loss correctly
+        labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100)
+        batch["labels"] = labels
+        return batch
+def create_vocabulary_from_data(
+    datasets: DatasetDict,
+    word_delimiter_token: Optional[str] = None,
+    unk_token: Optional[str] = None,
+    pad_token: Optional[str] = None,
+):
+    # Given training and test labels create vocabulary
+    def extract_all_chars(batch):
+        all_text = " ".join(batch["target_text"])
+        vocab = list(set(all_text))
+        return {"vocab": [vocab], "all_text": [all_text]}
+    vocabs = datasets.map(
+        extract_all_chars,
+        batched=True,
+        batch_size=-1,
+        keep_in_memory=True,
+        remove_columns=datasets["train"].column_names,
+    )
+    # take union of all unique characters in each dataset
+    vocab_set = functools.reduce(
+        lambda vocab_1, vocab_2: set(vocab_1["vocab"][0]) | set(vocab_2["vocab"][0]), vocabs.values()
+    )
+    vocab_dict = {v: k for k, v in enumerate(sorted(list(vocab_set)))}
+    # replace white space with delimiter token
+    if word_delimiter_token is not None:
+        vocab_dict[word_delimiter_token] = vocab_dict[" "]
+        del vocab_dict[" "]
+    # add unk and pad token
+    if unk_token is not None:
+        vocab_dict[unk_token] = len(vocab_dict)
+    if pad_token is not None:
+        vocab_dict[pad_token] = len(vocab_dict)
+    return vocab_dict
+def main():
+    # See all possible arguments in src/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+    # Detecting last checkpoint.
+    last_checkpoint = None
+    if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
+        last_checkpoint = get_last_checkpoint(training_args.output_dir)
+        if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
+            raise ValueError(
+                f"Output directory ({training_args.output_dir}) already exists and is not empty. "
+                "Use --overwrite_output_dir to overcome."
+            )
+        elif last_checkpoint is not None:
+            logger.info(
+                f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
+                "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
+            )
+    # Setup logging
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
+    # Log on each process the small summary:
+    logger.warning(
+        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
+        f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
+    )
+    # Set the verbosity to info of the Transformers logger (on main process only):
+    if is_main_process(training_args.local_rank):
+        transformers.utils.logging.set_verbosity_info()
+    logger.info("Training/evaluation parameters %s", training_args)
+    # Set seed before initializing model.
+    set_seed(training_args.seed)
+    # 1. First, let's load the dataset
+    raw_datasets = DatasetDict()
+    if training_args.do_train:
+        raw_datasets["train"] = load_dataset(
+            data_args.dataset_name,
+            data_args.dataset_config_name,
+            split=data_args.train_split_name,
+            use_auth_token=data_args.use_auth_token,
+        )
+        if data_args.audio_column_name not in raw_datasets["train"].column_names:
+            raise ValueError(
+                f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
+                "Make sure to set `--audio_column_name` to the correct audio column - one of "
+                f"{', '.join(raw_datasets['train'].column_names)}."
+            )
+        if data_args.text_column_name not in raw_datasets["train"].column_names:
+            raise ValueError(
+                f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
+                "Make sure to set `--text_column_name` to the correct text column - one of "
+                f"{', '.join(raw_datasets['train'].column_names)}."
+            )
+        if data_args.max_train_samples is not None:
+            raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples))
+    if training_args.do_eval:
+        raw_datasets["eval"] = load_dataset(
+            data_args.dataset_name,
+            data_args.dataset_config_name,
+            split=data_args.eval_split_name,
+            use_auth_token=data_args.use_auth_token,
+        )
+        if data_args.max_eval_samples is not None:
+            raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples))
+    # 2. We remove some special characters from the datasets
+    # that make training complicated and do not help in transcribing the speech
+    # E.g. characters, such as `,` and `.` do not really have an acoustic characteristic
+    # that could be easily picked up by the model
+    chars_to_ignore_regex = (
+        f'[{"".join(data_args.chars_to_ignore)}]' if data_args.chars_to_ignore is not None else None
+    )
+    text_column_name = data_args.text_column_name
+    def remove_special_characters(batch):
+        if chars_to_ignore_regex is not None:
+            batch["target_text"] = re.sub(chars_to_ignore_regex, "", batch[text_column_name]).lower() + " "
+        else:
+            batch["target_text"] = batch[text_column_name].lower() + " "
+        return batch
+    with training_args.main_process_first(desc="dataset map special characters removal"):
+        raw_datasets = raw_datasets.map(
+            remove_special_characters,
+            remove_columns=[text_column_name],
+            desc="remove special characters from datasets",
+        )
+    # save special tokens for tokenizer
+    word_delimiter_token = data_args.word_delimiter_token
+    unk_token = data_args.unk_token
+    pad_token = data_args.pad_token
+    # 3. Next, let's load the config as we might need it to create
+    # the tokenizer
+    # load config
+    config = AutoConfig.from_pretrained(
+        model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_auth_token=data_args.use_auth_token
+    )
+    # 4. Next, if no tokenizer file is defined,
+    # we create the vocabulary of the model by extracting all unique characters from
+    # the training and evaluation datasets
+    # We need to make sure that only first rank saves vocabulary
+    # make sure all processes wait until vocab is created
+    tokenizer_name_or_path = model_args.tokenizer_name_or_path
+    tokenizer_kwargs = {}
+    if tokenizer_name_or_path is None:
+        # save vocab in training output dir
+        tokenizer_name_or_path = training_args.output_dir
+        vocab_file = os.path.join(tokenizer_name_or_path, "vocab.json")
+        with training_args.main_process_first():
+            if training_args.overwrite_output_dir and os.path.isfile(vocab_file):
+                os.remove(vocab_file)
+        with training_args.main_process_first(desc="dataset map vocabulary creation"):
+            if not os.path.isfile(vocab_file):
+                os.makedirs(tokenizer_name_or_path, exist_ok=True)
+                vocab_dict = create_vocabulary_from_data(
+                    raw_datasets,
+                    word_delimiter_token=word_delimiter_token,
+                    unk_token=unk_token,
+                    pad_token=pad_token,
+                )
+                # save vocab dict to be loaded into tokenizer
+                with open(vocab_file, "w") as file:
+                    json.dump(vocab_dict, file)
+        # if tokenizer has just been created
+        # it is defined by `tokenizer_class` if present in config else by `model_type`
+        tokenizer_kwargs = {
+            "config": config if config.tokenizer_class is not None else None,
+            "tokenizer_type": config.model_type if config.tokenizer_class is None else None,
+            "unk_token": unk_token,
+            "pad_token": pad_token,
+            "word_delimiter_token": word_delimiter_token,
+        }
+    # 5. Now we can instantiate the feature extractor, tokenizer and model
+    # Note for distributed training, the .from_pretrained methods guarantee that only
+    # one local process can concurrently download model & vocab.
+    # load feature_extractor and tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(
+        tokenizer_name_or_path,
+        use_auth_token=data_args.use_auth_token,
+        **tokenizer_kwargs,
+    )
+    feature_extractor = AutoFeatureExtractor.from_pretrained(
+        model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_auth_token=data_args.use_auth_token
+    )
+    # adapt config
+    config.update(
+        {
+            "feat_proj_dropout": model_args.feat_proj_dropout,
+            "attention_dropout": model_args.attention_dropout,
+            "hidden_dropout": model_args.hidden_dropout,
+            "final_dropout": model_args.final_dropout,
+            "mask_time_prob": model_args.mask_time_prob,
+            "mask_time_length": model_args.mask_time_length,
+            "mask_feature_prob": model_args.mask_feature_prob,
+            "mask_feature_length": model_args.mask_feature_length,
+            "gradient_checkpointing": training_args.gradient_checkpointing,
+            "layerdrop": model_args.layerdrop,
+            "ctc_loss_reduction": model_args.ctc_loss_reduction,
+            "pad_token_id": tokenizer.pad_token_id,
+            "vocab_size": len(tokenizer),
+            "activation_dropout": model_args.activation_dropout,
+        }
+    )
+    # create model
+    model = AutoModelForCTC.from_pretrained(
+        model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        config=config,
+        use_auth_token=data_args.use_auth_token,
+    )
+    # freeze encoder
+    if model_args.freeze_feature_encoder:
+        model.freeze_feature_encoder()
+    # 6. Now we preprocess the datasets including loading the audio, resampling and normalization
+    # Thankfully, `datasets` takes care of automatically loading and resampling the audio,
+    # so that we just need to set the correct target sampling rate and normalize the input
+    # via the `feature_extractor`
+    # make sure that dataset decodes audio with correct sampling rate
+    dataset_sampling_rate = next(iter(raw_datasets.values())).features[data_args.audio_column_name].sampling_rate
+    if dataset_sampling_rate != feature_extractor.sampling_rate:
+        raw_datasets = raw_datasets.cast_column(
+            data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
+        )
+    # derive max & min input length for sample rate & max duration
+    max_input_length = data_args.max_duration_in_seconds * feature_extractor.sampling_rate
+    min_input_length = data_args.min_duration_in_seconds * feature_extractor.sampling_rate
+    audio_column_name = data_args.audio_column_name
+    num_workers = data_args.preprocessing_num_workers
+    # `phoneme_language` is only relevant if the model is fine-tuned on phoneme classification
+    phoneme_language = data_args.phoneme_language
+    # Preprocessing the datasets.
+    # We need to read the audio files as arrays and tokenize the targets.
+    def prepare_dataset(batch):
+        # load audio
+        sample = batch[audio_column_name]
+        inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
+        batch["input_values"] = inputs.input_values[0]
+        batch["input_length"] = len(batch["input_values"])
+        # encode targets
+        additional_kwargs = {}
+        if phoneme_language is not None:
+            additional_kwargs["phonemizer_lang"] = phoneme_language
+        batch["labels"] = tokenizer(batch["target_text"], **additional_kwargs).input_ids
+        return batch
+    with training_args.main_process_first(desc="dataset map preprocessing"):
+        vectorized_datasets = raw_datasets.map(
+            prepare_dataset,
+            remove_columns=next(iter(raw_datasets.values())).column_names,
+            num_proc=num_workers,
+            desc="preprocess datasets",
+        )
+        def is_audio_in_length_range(length):
+            return length > min_input_length and length < max_input_length
+        # filter data that is shorter than min_input_length
+        vectorized_datasets = vectorized_datasets.filter(
+            is_audio_in_length_range,
+            num_proc=num_workers,
+            input_columns=["input_length"],
+        )
+    # 7. Next, we can prepare the training.
+    # Let's use word error rate (WER) as our evaluation metric,
+    # instantiate a data collator and the trainer
+    # Define evaluation metrics during training, *i.e.* word error rate, character error rate
+    eval_metrics = {metric: load_metric(metric) for metric in data_args.eval_metrics}
+    # for large datasets it is advised to run the preprocessing on a
+    # single machine first with ``args.preprocessing_only`` since there will mostly likely
+    # be a timeout when running the script in distributed mode.
+    # In a second step ``args.preprocessing_only`` can then be set to `False` to load the
+    # cached dataset
+    if data_args.preprocessing_only:
+        logger.info(f"Data preprocessing finished. Files cached at {vectorized_datasets.cache_files}")
+        return
+    def compute_metrics(pred):
+        pred_logits = pred.predictions
+        pred_ids = np.argmax(pred_logits, axis=-1)
+        pred.label_ids[pred.label_ids == -100] = tokenizer.pad_token_id
+        pred_str = tokenizer.batch_decode(pred_ids)
+        # we do not want to group tokens when computing the metrics
+        label_str = tokenizer.batch_decode(pred.label_ids, group_tokens=False)
+        metrics = {k: v.compute(predictions=pred_str, references=label_str) for k, v in eval_metrics.items()}
+        return metrics
+    # Now save everything to be able to create a single processor later
+    if is_main_process(training_args.local_rank):
+        # save feature extractor, tokenizer and config
+        feature_extractor.save_pretrained(training_args.output_dir)
+        tokenizer.save_pretrained(training_args.output_dir)
+        config.save_pretrained(training_args.output_dir)
+    try:
+        processor = AutoProcessor.from_pretrained(training_args.output_dir)
+    except (OSError, KeyError):
+        warnings.warn(
+            "Loading a processor from a feature extractor config that does not"
+            " include a `processor_class` attribute is deprecated and will be removed in v5. Please add the following "
+            " attribute to your `preprocessor_config.json` file to suppress this warning: "
+            " `'processor_class': 'Wav2Vec2Processor'`",
+            FutureWarning,
+        )
+        processor = Wav2Vec2Processor.from_pretrained(training_args.output_dir)
+    # Instantiate custom data collator
+    data_collator = DataCollatorCTCWithPadding(processor=processor)
+    # Initialize Trainer
+    trainer = Trainer(
+        model=model,
+        data_collator=data_collator,
+        args=training_args,
+        compute_metrics=compute_metrics,
+        train_dataset=vectorized_datasets["train"] if training_args.do_train else None,
+        eval_dataset=vectorized_datasets["eval"] if training_args.do_eval else None,
+        tokenizer=feature_extractor,
+    )
+    # 8. Finally, we can start training
+    # Training
+    if training_args.do_train:
+        # use last checkpoint if exist
+        if last_checkpoint is not None:
+            checkpoint = last_checkpoint
+        elif os.path.isdir(model_args.model_name_or_path):
+            checkpoint = model_args.model_name_or_path
+        else:
+            checkpoint = None
+        train_result = trainer.train(resume_from_checkpoint=checkpoint)
+        trainer.save_model()
+        metrics = train_result.metrics
+        max_train_samples = (
+            data_args.max_train_samples
+            if data_args.max_train_samples is not None
+            else len(vectorized_datasets["train"])
+        )
+        metrics["train_samples"] = min(max_train_samples, len(vectorized_datasets["train"]))
+        trainer.log_metrics("train", metrics)
+        trainer.save_metrics("train", metrics)
+        trainer.save_state()
+    # Evaluation
+    results = {}
+    if training_args.do_eval:
+        logger.info("*** Evaluate ***")
+        metrics = trainer.evaluate()
+        max_eval_samples = (
+            data_args.max_eval_samples if data_args.max_eval_samples is not None else len(vectorized_datasets["eval"])
+        )
+        metrics["eval_samples"] = min(max_eval_samples, len(vectorized_datasets["eval"]))
+        trainer.log_metrics("eval", metrics)
+        trainer.save_metrics("eval", metrics)
+    # Write model card and (optionally) push to hub
+    config_name = data_args.dataset_config_name if data_args.dataset_config_name is not None else "na"
+    kwargs = {
+        "finetuned_from": model_args.model_name_or_path,
+        "tasks": "speech-recognition",
+        "tags": ["automatic-speech-recognition", data_args.dataset_name],
+        "dataset_args": f"Config: {config_name}, Training split: {data_args.train_split_name}, Eval split: {data_args.eval_split_name}",
+        "dataset": f"{data_args.dataset_name.upper()} - {config_name.upper()}",
+    }
+    if "common_voice" in data_args.dataset_name:
+        kwargs["language"] = config_name
+    if training_args.push_to_hub:
+        trainer.push_to_hub(**kwargs)
+    else:
+        trainer.create_model_card(**kwargs)
+    return results
+if __name__ == "__main__":
+    main()

runs/Feb06_10-51-19_job-aa543290-d6de-4d4d-8a32-2149b1b8e7e3/1644145311.054961/events.out.tfevents.1644145311.job-aa543290-d6de-4d4d-8a32-2149b1b8e7e3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3c678ecf607bcffe679f2e2c5208387e1199c8b9a6c5a0653c78ffe8f38212b7
+size 4564

runs/Feb06_10-51-19_job-aa543290-d6de-4d4d-8a32-2149b1b8e7e3/events.out.tfevents.1644145311.job-aa543290-d6de-4d4d-8a32-2149b1b8e7e3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fb2ee8a3c179e8381c3b4cab88c16fd18666a193c9839de1d36c0b177f234f84
+size 5469

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "\|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "./", "tokenizer_class": "Wav2Vec2CTCTokenizer"}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5b087e508c8c4a08718cd86b1845f3e1856b2be3b5a60e63bafc1975044b307e
+size 2991

vocab.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"c": 1, "×": 2, "̃": 3, "̌": 4, "ε": 5, "λ": 6, "μ": 7, "и": 8, "т": 9, "─": 10, "□": 11, "〈": 12, "〉": 13, "『": 14, "』": 15, "ア": 16, "オ": 17, "カ": 18, "チ": 19, "ド": 20, "ベ": 21, "ャ": 22, "ヤ": 23, "ン": 24, "・": 25, "ー": 26, "ㄟ": 27, "䲟": 28, "䴓": 29, "䴕": 30, "一": 31, "丁": 32, "七": 33, "万": 34, "丈": 35, "三": 36, "上": 37, "下": 38, "不": 39, "与": 40, "丐": 41, "丑": 42, "专": 43, "且": 44, "丕": 45, "世": 46, "丘": 47, "丙": 48, "业": 49, "丛": 50, "东": 51, "丝": 52, "丞": 53, "丟": 54, "丢": 55, "两": 56, "严": 57, "丧": 58, "个": 59, "丫": 60, "中": 61, "丰": 62, "串": 63, "临": 64, "丸": 65, "丹": 66, "为": 67, "主": 68, "丽": 69, "举": 70, "乂": 71, "乃": 72, "久": 73, "么": 74, "义": 75, "之": 76, "乌": 77, "乍": 78, "乎": 79, "乏": 80, "乐": 81, "乒": 82, "乓": 83, "乔": 84, "乖": 85, "乘": 86, "乙": 87, "九": 88, "乞": 89, "也": 90, "习": 91, "乡": 92, "书": 93, "买": 94, "乱": 95, "乳": 96, "乾": 97, "了": 98, "予": 99, "争": 100, "事": 101, "二": 102, "于": 103, "亏": 104, "云": 105, "互": 106, "五": 107, "井": 108, "亚": 109, "些": 110, "亡": 111, "交": 112, "亥": 113, "亦": 114, "产": 115, "亨": 116, "亩": 117, "享": 118, "京": 119, "亭": 120, "亮": 121, "亲": 122, "亳": 123, "人": 124, "亿": 125, "什": 126, "仁": 127, "仄": 128, "仅": 129, "仆": 130, "仇": 131, "今": 132, "介": 133, "仍": 134, "从": 135, "仑": 136, "仓": 137, "仔": 138, "仕": 139, "他": 140, "仗": 141, "付": 142, "仙": 143, "仞": 144, "代": 145, "令": 146, "以": 147, "仪": 148, "们": 149, "仰": 150, "仲": 151, "件": 152, "价": 153, "任": 154, "份": 155, "仿": 156, "企": 157, "伉": 158, "伊": 159, "伍": 160, "伎": 161, "伏": 162, "伐": 163, "休": 164, "众": 165, "优": 166, "伙": 167, "会": 168, "伛": 169, "伞": 170, "伟": 171, "传": 172, "伤": 173, "伦": 174, "伪": 175, "伫": 176, "伯": 177, "估": 178, "伴": 179, "伸": 180, "伺": 181, "似": 182, "伽": 183, "佃": 184, "但": 185, "位": 186, "低": 187, "住": 188, "佐": 189, "佑": 190, "体": 191, "何": 192, "佗": 193, "佘": 194, "余": 195, "佚": 196, "佛": 197, "作": 198, "佟": 199, "你": 200, "佣": 201, "佤": 202, "佥": 203, "佩": 204, "佬": 205, "佰": 206, "佳": 207, "使": 208, "侂": 209, "侃": 210, "侄": 211, "侈": 212, "例": 213, "侍": 214, "侏": 215, "侗": 216, "供": 217, "依": 218, "侠": 219, "侣": 220, "侦": 221, "侧": 222, "侨": 223, "侪": 224, "侬": 225, "侮": 226, "侯": 227, "侴": 228, "侵": 229, "便": 230, "促": 231, "俄": 232, "俊": 233, "俐": 234, "俗": 235, "俘": 236, "保": 237, "俞": 238, "信": 239, "俣": 240, "俨": 241, "俩": 242, "俭": 243, "修": 244, "俯": 245, "俱": 246, "俳": 247, "俵": 248, "俸": 249, "倍": 250, "倒": 251, "倘": 252, "候": 253, "倚": 254, "借": 255, "倡": 256, "倦": 257, "倪": 258, "倭": 259, "债": 260, "倻": 261, "值": 262, "倾": 263, "偃": 264, "假": 265, "偈": 266, "偏": 267, "偕": 268, "做": 269, "停": 270, "健": 271, "偰": 272, "偲": 273, "偶": 274, "偷": 275, "偿": 276, "傅": 277, "傍": 278, "傕": 279, "傣": 280, "储": 281, "催": 282, "傲": 283, "傻": 284, "像": 285, "僖": 286, "僚": 287, "僧": 288, "僵": 289, "儆": 290, "儋": 291, "儒": 292, "儿": 293, "兀": 294, "允": 295, "元": 296, "兄": 297, "充": 298, "兆": 299, "先": 300, "光": 301, "克": 302, "免": 303, "兑": 304, "兔": 305, "兖": 306, "党": 307, "兜": 308, "入": 309, "全": 310, "八": 311, "公": 312, "六": 313, "兮": 314, "兰": 315, "共": 316, "关": 317, "兴": 318, "兵": 319, "其": 320, "具": 321, "典": 322, "兹": 323, "养": 324, "兼": 325, "兽": 326, "冀": 327, "内": 328, "冈": 329, "冉": 330, "册": 331, "再": 332, "冒": 333, "冕": 334, "冗": 335, "写": 336, "军": 337, "农": 338, "冠": 339, "冢": 340, "冤": 341, "冥": 342, "冬": 343, "冯": 344, "冰": 345, "冱": 346, "冲": 347, "决": 348, "况": 349, "冶": 350, "冷": 351, "冻": 352, "冼": 353, "净": 354, "凃": 355, "凄": 356, "准": 357, "凉": 358, "凋": 359, "凌": 360, "减": 361, "凑": 362, "凝": 363, "几": 364, "凡": 365, "凤": 366, "凭": 367, "凯": 368, "凰": 369, "凳": 370, "凶": 371, "凸": 372, "凹": 373, "出": 374, "击": 375, "函": 376, "凿": 377, "刀": 378, "刃": 379, "分": 380, "切": 381, "刈": 382, "刊": 383, "刍": 384, "刑": 385, "划": 386, "列": 387, "刘": 388, "则": 389, "刚": 390, "创": 391, "初": 392, "删": 393, "判": 394, "別": 395, "利": 396, "别": 397, "刮": 398, "到": 399, "制": 400, "刷": 401, "券": 402, "刹": 403, "刺": 404, "刻": 405, "剀": 406, "剂": 407, "削": 408, "剌": 409, "前": 410, "剑": 411, "剖": 412, "剥": 413, "剧": 414, "剩": 415, "剪": 416, "副": 417, "割": 418, "剿": 419, "劈": 420, "力": 421, "劝": 422, "办": 423, "功": 424, "加": 425, "务": 426, "劣": 427, "动": 428, "助": 429, "努": 430, "劫": 431, "劭": 432, "励": 433, "劲": 434, "劳": 435, "��": 436, "势": 437, "勃": 438, "勇": 439, "勉": 440, "勋": 441, "勍": 442, "勐": 443, "勒": 444, "勖": 445, "勘": 446, "募": 447, "勤": 448, "勺": 449, "勾": 450, "勿": 451, "匀": 452, "包": 453, "匆": 454, "匈": 455, "匍": 456, "匐": 457, "匕": 458, "化": 459, "北": 460, "匙": 461, "匝": 462, "匠": 463, "匡": 464, "匣": 465, "匪": 466, "匮": 467, "匹": 468, "区": 469, "医": 470, "匾": 471, "匿": 472, "十": 473, "千": 474, "升": 475, "午": 476, "卉": 477, "半": 478, "华": 479, "协": 480, "卑": 481, "卒": 482, "卓": 483, "单": 484, "卖": 485, "南": 486, "博": 487, "卜": 488, "卞": 489, "占": 490, "卡": 491, "卢": 492, "卤": 493, "卦": 494, "卧": 495, "卫": 496, "卯": 497, "印": 498, "危": 499, "即": 500, "却": 501, "卵": 502, "卷": 503, "卸": 504, "卿": 505, "厂": 506, "厄": 507, "厅": 508, "历": 509, "厉": 510, "压": 511, "厌": 512, "厍": 513, "厕": 514, "厘": 515, "厚": 516, "厝": 517, "原": 518, "厢": 519, "厥": 520, "厦": 521, "厨": 522, "厩": 523, "厮": 524, "去": 525, "县": 526, "参": 527, "叅": 528, "又": 529, "叉": 530, "及": 531, "友": 532, "双": 533, "反": 534, "发": 535, "叔": 536, "取": 537, "受": 538, "变": 539, "叙": 540, "叛": 541, "叟": 542, "叠": 543, "口": 544, "古": 545, "句": 546, "另": 547, "叩": 548, "只": 549, "叫": 550, "召": 551, "叭": 552, "可": 553, "台": 554, "史": 555, "右": 556, "叶": 557, "号": 558, "司": 559, "叹": 560, "叻": 561, "叼": 562, "吁": 563, "吃": 564, "各": 565, "合": 566, "吉": 567, "吊": 568, "同": 569, "名": 570, "后": 571, "吏": 572, "吐": 573, "向": 574, "吓": 575, "吕": 576, "吖": 577, "吗": 578, "君": 579, "吞": 580, "吟": 581, "吠": 582, "否": 583, "吧": 584, "吨": 585, "含": 586, "听": 587, "启": 588, "吴": 589, "吵": 590, "吸": 591, "吹": 592, "吻": 593, "吼": 594, "吾": 595, "呀": 596, "呆": 597, "呈": 598, "告": 599, "呋": 600, "呐": 601, "呔": 602, "呗": 603, "员": 604, "呢": 605, "呤": 606, "周": 607, "味": 608, "呵": 609, "呻": 610, "呼": 611, "命": 612, "咀": 613, "和": 614, "咎": 615, "咏": 616, "咒": 617, "咕": 618, "咖": 619, "咝": 620, "咨": 621, "咪": 622, "咬": 623, "咳": 624, "咸": 625, "咽": 626, "哀": 627, "品": 628, "哄": 629, "哇": 630, "哈": 631, "哉": 632, "响": 633, "哑": 634, "哔": 635, "哥": 636, "哨": 637, "哩": 638, "哪": 639, "哭": 640, "哮": 641, "哲": 642, "哺": 643, "哼": 644, "唁": 645, "唆": 646, "唇": 647, "唐": 648, "唑": 649, "唔": 650, "唤": 651, "唬": 652, "售": 653, "唯": 654, "唱": 655, "唸": 656, "唾": 657, "啄": 658, "商": 659, "啉": 660, "啊": 661, "啡": 662, "啤": 663, "啥": 664, "啦": 665, "啧": 666, "啰": 667, "啶": 668, "啸": 669, "喀": 670, "喃": 671, "善": 672, "喇": 673, "喉": 674, "喊": 675, "喔": 676, "喘": 677, "喙": 678, "喜": 679, "喝": 680, "喧": 681, "喱": 682, "喵": 683, "喷": 684, "喹": 685, "喻": 686, "喾": 687, "嗅": 688, "嗓": 689, "嗜": 690, "嗣": 691, "嗽": 692, "嘈": 693, "嘉": 694, "嘌": 695, "嘎": 696, "嘘": 697, "嘛": 698, "嘟": 699, "嘧": 700, "嘲": 701, "嘴": 702, "嘻": 703, "噌": 704, "噜": 705, "器": 706, "噩": 707, "噪": 708, "噬": 709, "噶": 710, "嚏": 711, "嚓": 712, "嚣": 713, "囊": 714, "囚": 715, "四": 716, "回": 717, "因": 718, "团": 719, "园": 720, "困": 721, "围": 722, "固": 723, "国": 724, "图": 725, "圃": 726, "圆": 727, "圈": 728, "圉": 729, "圜": 730, "土": 731, "圣": 732, "在": 733, "圩": 734, "圪": 735, "圭": 736, "地": 737, "圳": 738, "圹": 739, "场": 740, "圻": 741, "圾": 742, "址": 743, "坂": 744, "均": 745, "坊": 746, "坍": 747, "坎": 748, "坏": 749, "坐": 750, "坑": 751, "块": 752, "坚": 753, "坛": 754, "坜": 755, "坝": 756, "坞": 757, "坟": 758, "坠": 759, "坡": 760, "坤": 761, "坦": 762, "坨": 763, "坪": 764, "坳": 765, "坻": 766, "垂": 767, "垃": 768, "垄": 769, "型": 770, "垌": 771, "垒": 772, "垢": 773, "垣": 774, "垦": 775, "垩": 776, "垫": 777, "埃": 778, "埇": 779, "埈": 780, "埋": 781, "城": 782, "埔": 783, "埕": 784, "埗": 785, "域": 786, "埠": 787, "培": 788, "基": 789, "堂": 790, "堆": 791, "堇": 792, "堍": 793, "堎": 794, "堕": 795, "堡": 796, "堤": 797, "堪": 798, "堰": 799, "堵": 800, "堺": 801, "塌": 802, "塑": 803, "塔": 804, "塘": 805, "塞": 806, "填": 807, "塬": 808, "塾": 809, "墀": 810, "境": 811, "墅": 812, "墓": 813, "墙": 814, "增": 815, "墟": 816, "墨": 817, "墩": 818, "壁": 819, "壕": 820, "壤": 821, "士": 822, "壬": 823, "壮": 824, "声": 825, "壳": 826, "壶": 827, "壸": 828, "壹": 829, "壽": 830, "处": 831, "备": 832, "复": 833, "夏": 834, "夕": 835, "外": 836, "夙": 837, "多": 838, "夜": 839, "够": 840, "夥": 841, "大": 842, "天": 843, "太": 844, "夫": 845, "夭": 846, "央": 847, "失": 848, "头": 849, "夷": 850, "夸": 851, "夹": 852, "夺": 853, "奂": 854, "奄": 855, "奇": 856, "奈": 857, "奉": 858, "奋": 859, "奎": 860, "奏": 861, "契": 862, "奔": 863, "奕": 864, "奖": 865, "套": 866, "奘": 867, "奚": 868, "奠": 869, "奢": 870, "奣": 871, "奥": 872, "女": 873, "奴": 874, "奶": 875, "奸": 876, "她": 877, "好": 878, "如": 879, "妃": 880, "妄": 881, "妆": 882, "妇": 883, "妈": 884, "妊": 885, "妍": 886, "妒": 887, "妓": 888, "妖": 889, "妙": 890, "妡": 891, "妤": 892, "妥": 893, "妨": 894, "妫": 895, "妮": 896, "妲": 897, "妳": 898, "妹": 899, "妻": 900, "妾": 901, "姆": 902, "姊": 903, "始": 904, "姐": 905, "姑": 906, "姒": 907, "姓": 908, "委": 909, "姚": 910, "姜": 911, "姝": 912, "姨": 913, "姬": 914, "姮": 915, "姻": 916, "姿": 917, "威": 918, "娃": 919, "娄": 920, "娅": 921, "娆": 922, "娇": 923, "娘": 924, "娜": 925, "娟": 926, "娠": 927, "娣": 928, "娥": 929, "娱": 930, "娴": 931, "娶": 932, "娼": 933, "婆": 934, "婉": 935, "婕": 936, "婚": 937, "婢": 938, "婪": 939, "婴": 940, "婵": 941, "婷": 942, "婺": 943, "婿": 944, "媒": 945, "媛": 946, "媲": 947, "媳": 948, "嫁": 949, "嫉": 950, "嫌": 951, "嫔": 952, "嫖": 953, "嫡": 954, "嫣": 955, "嫩": 956, "嬤": 957, "嬴": 958, "嬷": 959, "孀": 960, "子": 961, "孔": 962, "孕": 963, "孖": 964, "字": 965, "存": 966, "孙": 967, "孚": 968, "孛": 969, "孜": 970, "孝": 971, "孟": 972, "孢": 973, "季": 974, "孤": 975, "学": 976, "孩": 977, "孪": 978, "孵": 979, "孺": 980, "宁": 981, "它": 982, "宅": 983, "宇": 984, "守": 985, "安": 986, "宋": 987, "完": 988, "宏": 989, "宕": 990, "宗": 991, "官": 992, "宙": 993, "定": 994, "宛": 995, "宜": 996, "宝": 997, "实": 998, "宠": 999, "审": 1000, "客": 1001, "宣": 1002, "室": 1003, "宥": 1004, "宦": 1005, "宪": 1006, "宫": 1007, "宰": 1008, "害": 1009, "宴": 1010, "家": 1011, "宸": 1012, "容": 1013, "宽": 1014, "宾": 1015, "宿": 1016, "寀": 1017, "寂": 1018, "寄": 1019, "寅": 1020, "密": 1021, "寇": 1022, "富": 1023, "寒": 1024, "寓": 1025, "寝": 1026, "寞": 1027, "察": 1028, "寡": 1029, "寨": 1030, "寮": 1031, "寰": 1032, "寸": 1033, "对": 1034, "寺": 1035, "寻": 1036, "导": 1037, "寿": 1038, "封": 1039, "専": 1040, "射": 1041, "将": 1042, "尉": 1043, "尊": 1044, "小": 1045, "少": 1046, "尔": 1047, "尕": 1048, "尖": 1049, "尘": 1050, "尚": 1051, "尝": 1052, "尤": 1053, "尧": 1054, "尨": 1055, "就": 1056, "尸": 1057, "尹": 1058, "尺": 1059, "尻": 1060, "尼": 1061, "尽": 1062, "尾": 1063, "尿": 1064, "局": 1065, "层": 1066, "居": 1067, "屈": 1068, "届": 1069, "屋": 1070, "屎": 1071, "屏": 1072, "屐": 1073, "屑": 1074, "展": 1075, "属": 1076, "屠": 1077, "屡": 1078, "履": 1079, "屯": 1080, "山": 1081, "屹": 1082, "屿": 1083, "岁": 1084, "岂": 1085, "岈": 1086, "岐": 1087, "岑": 1088, "岔": 1089, "岗": 1090, "岚": 1091, "岛": 1092, "岩": 1093, "岫": 1094, "岬": 1095, "岭": 1096, "岱": 1097, "岳": 1098, "岷": 1099, "岸": 1100, "峄": 1101, "峒": 1102, "峙": 1103, "峡": 1104, "峤": 1105, "峥": 1106, "峨": 1107, "峩": 1108, "峪": 1109, "峭": 1110, "峯": 1111, "峰": 1112, "峻": 1113, "崁": 1114, "崂": 1115, "崇": 1116, "崎": 1117, "崔": 1118, "崖": 1119, "崛": 1120, "崞": 1121, "崧": 1122, "崩": 1123, "崭": 1124, "崴": 1125, "嵋": 1126, "嵌": 1127, "嵖": 1128, "嵗": 1129, "嵩": 1130, "嵯": 1131, "嵴": 1132, "嶷": 1133, "巅": 1134, "川": 1135, "州": 1136, "巡": 1137, "巢": 1138, "工": 1139, "左": 1140, "巧": 1141, "巨": 1142, "巩": 1143, "巫": 1144, "差": 1145, "巯": 1146, "己": 1147, "已": 1148, "巳": 1149, "巴": 1150, "巷": 1151, "巽": 1152, "巾": 1153, "币": 1154, "市": 1155, "布": 1156, "帅": 1157, "帆": 1158, "师": 1159, "希": 1160, "帐": 1161, "帕": 1162, "帖": 1163, "帘": 1164, "帚": 1165, "帛": 1166, "帜": 1167, "帝": 1168, "带": 1169, "帧": 1170, "席": 1171, "帮": 1172, "帷": 1173, "常": 1174, "帽": 1175, "幂": 1176, "幅": 1177, "幌": 1178, "幔": 1179, "幕": 1180, "幡": 1181, "幢": 1182, "干": 1183, "平": 1184, "年": 1185, "并": 1186, "幸": 1187, "幻": 1188, "幼": 1189, "幽": 1190, "广": 1191, "庄": 1192, "庆": 1193, "庇": 1194, "床": 1195, "序": 1196, "庐": 1197, "库": 1198, "应": 1199, "底": 1200, "店": 1201, "庙": 1202, "庚": 1203, "府": 1204, "庞": 1205, "废": 1206, "度": 1207, "座": 1208, "庭": 1209, "庵": 1210, "庶": 1211, "康": 1212, "庸": 1213, "庹": 1214, "庾": 1215, "廆": 1216, "廉": 1217, "廊": 1218, "廓": 1219, "廖": 1220, "廪": 1221, "延": 1222, "廷": 1223, "建": 1224, "廿": 1225, "开": 1226, "异": 1227, "弃": 1228, "弄": 1229, "弈": 1230, "弊": 1231, "式": 1232, "弓": 1233, "引": 1234, "弗": 1235, "弘": 1236, "弟": 1237, "张": 1238, "弢": 1239, "弥": 1240, "弦": 1241, "弧": 1242, "弯": 1243, "弱": 1244, "弹": 1245, "强": 1246, "弼": 1247, "归": 1248, "当": 1249, "录": 1250, "彗": 1251, "彝": 1252, "形": 1253, "彤": 1254, "彦": 1255, "彧": 1256, "彩": 1257, "彪": 1258, "彬": 1259, "彭": 1260, "彰": 1261, "影": 1262, "役": 1263, "彻": 1264, "彼": 1265, "往": 1266, "��": 1267, "径": 1268, "待": 1269, "徇": 1270, "很": 1271, "徊": 1272, "律": 1273, "徐": 1274, "徒": 1275, "得": 1276, "徘": 1277, "徙": 1278, "御": 1279, "循": 1280, "微": 1281, "德": 1282, "徽": 1283, "心": 1284, "必": 1285, "忆": 1286, "忌": 1287, "忍": 1288, "忒": 1289, "志": 1290, "忘": 1291, "忙": 1292, "忠": 1293, "忤": 1294, "忧": 1295, "快": 1296, "忱": 1297, "念": 1298, "忻": 1299, "忽": 1300, "怀": 1301, "态": 1302, "怎": 1303, "怒": 1304, "怕": 1305, "怖": 1306, "怜": 1307, "思": 1308, "怡": 1309, "急": 1310, "性": 1311, "怨": 1312, "怪": 1313, "怵": 1314, "总": 1315, "恂": 1316, "恋": 1317, "恐": 1318, "恒": 1319, "恕": 1320, "恢": 1321, "恨": 1322, "恩": 1323, "恪": 1324, "恬": 1325, "恭": 1326, "息": 1327, "恰": 1328, "恶": 1329, "恺": 1330, "恼": 1331, "恽": 1332, "悄": 1333, "悉": 1334, "悌": 1335, "悍": 1336, "悔": 1337, "悖": 1338, "悚": 1339, "悟": 1340, "悠": 1341, "患": 1342, "悦": 1343, "您": 1344, "悫": 1345, "悬": 1346, "悲": 1347, "悼": 1348, "情": 1349, "惊": 1350, "惑": 1351, "惕": 1352, "惘": 1353, "惜": 1354, "惟": 1355, "惠": 1356, "惧": 1357, "惨": 1358, "惩": 1359, "惪": 1360, "惬": 1361, "惭": 1362, "惯": 1363, "惰": 1364, "想": 1365, "惹": 1366, "惺": 1367, "愁": 1368, "愈": 1369, "愉": 1370, "意": 1371, "愔": 1372, "愕": 1373, "愚": 1374, "感": 1375, "愤": 1376, "愧": 1377, "愿": 1378, "慈": 1379, "慌": 1380, "慎": 1381, "慑": 1382, "慕": 1383, "慢": 1384, "慧": 1385, "慨": 1386, "慰": 1387, "慷": 1388, "慾": 1389, "憍": 1390, "憧": 1391, "憨": 1392, "憩": 1393, "憬": 1394, "憾": 1395, "懂": 1396, "懈": 1397, "懋": 1398, "懒": 1399, "懔": 1400, "懦": 1401, "懿": 1402, "戈": 1403, "戊": 1404, "戌": 1405, "戍": 1406, "戎": 1407, "戏": 1408, "成": 1409, "我": 1410, "戒": 1411, "或": 1412, "战": 1413, "戚": 1414, "戛": 1415, "戟": 1416, "截": 1417, "戮": 1418, "戴": 1419, "户": 1420, "房": 1421, "所": 1422, "扁": 1423, "扇": 1424, "扈": 1425, "扉": 1426, "手": 1427, "才": 1428, "扎": 1429, "扑": 1430, "打": 1431, "扔": 1432, "托": 1433, "扣": 1434, "执": 1435, "扩": 1436, "扫": 1437, "扬": 1438, "扭": 1439, "扮": 1440, "扯": 1441, "扰": 1442, "扶": 1443, "批": 1444, "找": 1445, "承": 1446, "技": 1447, "抄": 1448, "把": 1449, "抑": 1450, "抒": 1451, "抓": 1452, "投": 1453, "抖": 1454, "抗": 1455, "折": 1456, "抚": 1457, "抛": 1458, "抢": 1459, "护": 1460, "报": 1461, "披": 1462, "抬": 1463, "抱": 1464, "抵": 1465, "抹": 1466, "押": 1467, "抽": 1468, "拂": 1469, "担": 1470, "拆": 1471, "拈": 1472, "拉": 1473, "拌": 1474, "拍": 1475, "拐": 1476, "拒": 1477, "拓": 1478, "拔": 1479, "拖": 1480, "拗": 1481, "拘": 1482, "招": 1483, "拜": 1484, "拟": 1485, "拢": 1486, "拣": 1487, "拥": 1488, "拦": 1489, "拨": 1490, "择": 1491, "括": 1492, "拮": 1493, "拯": 1494, "拱": 1495, "拳": 1496, "拷": 1497, "拼": 1498, "拾": 1499, "拿": 1500, "持": 1501, "挂": 1502, "指": 1503, "按": 1504, "挑": 1505, "挖": 1506, "挚": 1507, "挛": 1508, "挝": 1509, "挞": 1510, "挟": 1511, "挡": 1512, "挤": 1513, "挥": 1514, "挪": 1515, "挫": 1516, "振": 1517, "挹": 1518, "挺": 1519, "挽": 1520, "捆": 1521, "捉": 1522, "捍": 1523, "捏": 1524, "捐": 1525, "捕": 1526, "捞": 1527, "损": 1528, "捡": 1529, "换": 1530, "捣": 1531, "捧": 1532, "据": 1533, "捷": 1534, "掀": 1535, "授": 1536, "掉": 1537, "掌": 1538, "掏": 1539, "排": 1540, "掖": 1541, "掘": 1542, "掛": 1543, "掠": 1544, "探": 1545, "接": 1546, "控": 1547, "推": 1548, "掩": 1549, "措": 1550, "掳": 1551, "掷": 1552, "掸": 1553, "掾": 1554, "揆": 1555, "揉": 1556, "揍": 1557, "描": 1558, "提": 1559, "插": 1560, "揖": 1561, "握": 1562, "揣": 1563, "揭": 1564, "援": 1565, "揷": 1566, "揽": 1567, "搁": 1568, "搅": 1569, "搏": 1570, "搜": 1571, "搞": 1572, "搪": 1573, "搬": 1574, "搭": 1575, "携": 1576, "摄": 1577, "摆": 1578, "摇": 1579, "摊": 1580, "摔": 1581, "摘": 1582, "摧": 1583, "摩": 1584, "摸": 1585, "摹": 1586, "摺": 1587, "撑": 1588, "撒": 1589, "撕": 1590, "撞": 1591, "撤": 1592, "播": 1593, "撮": 1594, "撰": 1595, "撼": 1596, "擂": 1597, "擅": 1598, "操": 1599, "擎": 1600, "擒": 1601, "擢": 1602, "擦": 1603, "攀": 1604, "攒": 1605, "攥": 1606, "支": 1607, "收": 1608, "攸": 1609, "改": 1610, "攻": 1611, "放": 1612, "政": 1613, "故": 1614, "效": 1615, "敌": 1616, "敏": 1617, "救": 1618, "敕": 1619, "敖": 1620, "教": 1621, "敛": 1622, "敞": 1623, "敢": 1624, "散": 1625, "敦": 1626, "敬": 1627, "数": 1628, "敲": 1629, "整": 1630, "敷": 1631, "文": 1632, "斋": 1633, "斌": 1634, "斐": 1635, "斑": 1636, "斗": 1637, "料": 1638, "斛": 1639, "斜": 1640, "斡": 1641, "斤": 1642, "斥": 1643, "斧": 1644, "斩": 1645, "断": 1646, "斯": 1647, "新": 1648, "方": 1649, "於": 1650, "施": 1651, "旁": 1652, "旅": 1653, "旆": 1654, "旋": 1655, "旌": 1656, "族": 1657, "旗": 1658, "无": 1659, "既": 1660, "日": 1661, "旦": 1662, "旧": 1663, "旨": 1664, "早": 1665, "旬": 1666, "旭": 1667, "旱": 1668, "时": 1669, "旺": 1670, "昀": 1671, "昂": 1672, "昆": 1673, "昇": 1674, "昊": 1675, "昌": 1676, "明": 1677, "昏": 1678, "易": 1679, "昔": 1680, "昕": 1681, "昙": 1682, "星": 1683, "映": 1684, "春": 1685, "昧": 1686, "昨": 1687, "昭": 1688, "是": 1689, "昴": 1690, "昵": 1691, "昶": 1692, "昼": 1693, "显": 1694, "晁": 1695, "晃": 1696, "晊": 1697, "晋": 1698, "晏": 1699, "晒": 1700, "晓": 1701, "晔": 1702, "晕": 1703, "晖": 1704, "晚": 1705, "晟": 1706, "晤": 1707, "晦": 1708, "晨": 1709, "普": 1710, "景": 1711, "晰": 1712, "晴": 1713, "晶": 1714, "智": 1715, "暂": 1716, "暄": 1717, "暅": 1718, "暎": 1719, "暑": 1720, "暖": 1721, "暗": 1722, "暨": 1723, "暮": 1724, "暴": 1725, "暹": 1726, "曙": 1727, "曜": 1728, "曝": 1729, "曦": 1730, "曰": 1731, "曲": 1732, "曳": 1733, "更": 1734, "曷": 1735, "曹": 1736, "曼": 1737, "曾": 1738, "替": 1739, "最": 1740, "月": 1741, "有": 1742, "朋": 1743, "服": 1744, "朔": 1745, "朕": 1746, "朗": 1747, "望": 1748, "朝": 1749, "期": 1750, "木": 1751, "未": 1752, "末": 1753, "本": 1754, "札": 1755, "术": 1756, "朱": 1757, "朴": 1758, "朵": 1759, "机": 1760, "朽": 1761, "杀": 1762, "杂": 1763, "权": 1764, "杆": 1765, "杉": 1766, "李": 1767, "杏": 1768, "材": 1769, "村": 1770, "杓": 1771, "杖": 1772, "杙": 1773, "杜": 1774, "束": 1775, "杠": 1776, "条": 1777, "来": 1778, "杨": 1779, "杭": 1780, "杯": 1781, "杰": 1782, "杻": 1783, "松": 1784, "板": 1785, "极": 1786, "构": 1787, "枋": 1788, "析": 1789, "枕": 1790, "林": 1791, "枚": 1792, "果": 1793, "枝": 1794, "枞": 1795, "枢": 1796, "枣": 1797, "枨": 1798, "枪": 1799, "枫": 1800, "枭": 1801, "枯": 1802, "枳": 1803, "架": 1804, "柃": 1805, "柄": 1806, "柏": 1807, "某": 1808, "柑": 1809, "染": 1810, "柔": 1811, "柚": 1812, "柜": 1813, "柝": 1814, "柞": 1815, "柠": 1816, "查": 1817, "柩": 1818, "柬": 1819, "柯": 1820, "柰": 1821, "柱": 1822, "柳": 1823, "柴": 1824, "査": 1825, "柽": 1826, "柿": 1827, "栀": 1828, "栃": 1829, "栅": 1830, "标": 1831, "栈": 1832, "栉": 1833, "栋": 1834, "栎": 1835, "栏": 1836, "树": 1837, "栓": 1838, "栖": 1839, "栗": 1840, "校": 1841, "栢": 1842, "栩": 1843, "株": 1844, "栲": 1845, "栳": 1846, "样": 1847, "核": 1848, "根": 1849, "栻": 1850, "格": 1851, "栽": 1852, "栾": 1853, "桀": 1854, "桂": 1855, "桃": 1856, "桄": 1857, "桅": 1858, "框": 1859, "案": 1860, "桉": 1861, "桌": 1862, "桐": 1863, "桑": 1864, "桓": 1865, "桔": 1866, "桕": 1867, "桝": 1868, "桡": 1869, "桢": 1870, "档": 1871, "桤": 1872, "桥": 1873, "桦": 1874, "桧": 1875, "桨": 1876, "桩": 1877, "桫": 1878, "桶": 1879, "梁": 1880, "梅": 1881, "梓": 1882, "梗": 1883, "梢": 1884, "梣": 1885, "梦": 1886, "梧": 1887, "梨": 1888, "梭": 1889, "梯": 1890, "械": 1891, "梳": 1892, "梵": 1893, "梾": 1894, "检": 1895, "棁": 1896, "棉": 1897, "棋": 1898, "棍": 1899, "棒": 1900, "棕": 1901, "棘": 1902, "棚": 1903, "棠": 1904, "棣": 1905, "棨": 1906, "森": 1907, "棱": 1908, "棵": 1909, "棹": 1910, "棺": 1911, "棻": 1912, "椅": 1913, "椋": 1914, "植": 1915, "椎": 1916, "椒": 1917, "検": 1918, "椤": 1919, "椭": 1920, "椰": 1921, "椴": 1922, "椹": 1923, "椿": 1924, "楔": 1925, "楚": 1926, "楝": 1927, "楞": 1928, "楠": 1929, "楣": 1930, "楦": 1931, "楫": 1932, "楮": 1933, "楯": 1934, "楷": 1935, "楸": 1936, "楹": 1937, "楼": 1938, "概": 1939, "榄": 1940, "榆": 1941, "榈": 1942, "榉": 1943, "榔": 1944, "榕": 1945, "榖": 1946, "榙": 1947, "榛": 1948, "榜": 1949, "榧": 1950, "榨": 1951, "榭": 1952, "榴": 1953, "榻": 1954, "槐": 1955, "槚": 1956, "槛": 1957, "槟": 1958, "槭": 1959, "槱": 1960, "槲": 1961, "槽": 1962, "槿": 1963, "樊": 1964, "樟": 1965, "模": 1966, "樨": 1967, "横": 1968, "樱": 1969, "樵": 1970, "樽": 1971, "樾": 1972, "橄": 1973, "橇": 1974, "橐": 1975, "橘": 1976, "橙": 1977, "橡": 1978, "檀": 1979, "檐": 1980, "檗": 1981, "檬": 1982, "欠": 1983, "次": 1984, "欢": 1985, "欣": 1986, "欧": 1987, "欲": 1988, "欸": 1989, "欺": 1990, "款": 1991, "歆": 1992, "歇": 1993, "歉": 1994, "歌": 1995, "歙": 1996, "止": 1997, "正": 1998, "此": 1999, "步": 2000, "武": 2001, "歧": 2002, "歪": 2003, "歹": 2004, "死": 2005, "歼": 2006, "殃": 2007, "殆": 2008, "殉": 2009, "殊": 2010, "残": 2011, "殖": 2012, "殡": 2013, "殴": 2014, "段": 2015, "殷": 2016, "殿": 2017, "毁": 2018, "毅": 2019, "毋": 2020, "母": 2021, "每": 2022, "毒": 2023, "毓": 2024, "比": 2025, "毕": 2026, "毗": 2027, "毙": 2028, "毛": 2029, "毡": 2030, "毫": 2031, "氏": 2032, "民": 2033, "氓": 2034, "气": 2035, "氖": 2036, "氙": 2037, "氛": 2038, "氟": 2039, "氡": 2040, "氢": 2041, "氦": 2042, "氧": 2043, "氨": 2044, "氮": 2045, "氯": 2046, "氰": 2047, "水": 2048, "永": 2049, "汀": 2050, "汁": 2051, "求": 2052, "汇": 2053, "汉": 2054, "汐": 2055, "汕": 2056, "汗": 2057, "汛": 2058, "汜": 2059, "汝": 2060, "汞": 2061, "江": 2062, "池": 2063, "污": 2064, "汤": 2065, "汪": 2066, "汰": 2067, "汲": 2068, "汴": 2069, "汶": 2070, "汹": 2071, "汽": 2072, "汾": 2073, "沁": 2074, "沂": 2075, "沃": 2076, "沅": 2077, "沆": 2078, "沈": 2079, "沉": 2080, "沌": 2081, "沐": 2082, "沔": 2083, "沙": 2084, "沛": 2085, "沟": 2086, "没": 2087, "沤": 2088, "沥": 2089, "沦": 2090, "沧": 2091, "沪": 2092, "沫": 2093, "沭": 2094, "沱": 2095, "河": 2096, "沸": 2097, "油": 2098, "治": 2099, "沼": 2100, "沽": 2101, "沾": 2102, "沿": 2103, "泄": 2104, "泉": 2105, "泊": 2106, "泌": 2107, "泓": 2108, "法": 2109, "泗": 2110, "泛": 2111, "泠": 2112, "泡": 2113, "波": 2114, "泣": 2115, "泥": 2116, "注": 2117, "泪": 2118, "泮": 2119, "泯": 2120, "泰": 2121, "泱": 2122, "泳": 2123, "泵": 2124, "泷": 2125, "泸": 2126, "泻": 2127, "泼": 2128, "泽": 2129, "泾": 2130, "洁": 2131, "洄": 2132, "洋": 2133, "洐": 2134, "洒": 2135, "洗": 2136, "洙": 2137, "洛": 2138, "洞": 2139, "洣": 2140, "津": 2141, "洪": 2142, "洮": 2143, "洱": 2144, "洲": 2145, "洵": 2146, "洹": 2147, "活": 2148, "洼": 2149, "洽": 2150, "派": 2151, "流": 2152, "浅": 2153, "浆": 2154, "浇": 2155, "浉": 2156, "浊": 2157, "测": 2158, "济": 2159, "浏": 2160, "浑": 2161, "浒": 2162, "浓": 2163, "浔": 2164, "浙": 2165, "浚": 2166, "浜": 2167, "浞": 2168, "浦": 2169, "浩": 2170, "浪": 2171, "浮": 2172, "浴": 2173, "海": 2174, "浸": 2175, "涂": 2176, "涅": 2177, "消": 2178, "涉": 2179, "涌": 2180, "涓": 2181, "涛": 2182, "涝": 2183, "涞": 2184, "涟": 2185, "涡": 2186, "润": 2187, "涧": 2188, "涨": 2189, "涩": 2190, "涪": 2191, "涯": 2192, "液": 2193, "涵": 2194, "淀": 2195, "淄": 2196, "淅": 2197, "淆": 2198, "淇": 2199, "淋": 2200, "淑": 2201, "淖": 2202, "淘": 2203, "淞": 2204, "淡": 2205, "淤": 2206, "淫": 2207, "淮": 2208, "淯": 2209, "深": 2210, "淳": 2211, "混": 2212, "淹": 2213, "添": 2214, "淼": 2215, "清": 2216, "渊": 2217, "渌": 2218, "渍": 2219, "渎": 2220, "渐": 2221, "渔": 2222, "渗": 2223, "渚": 2224, "渝": 2225, "渠": 2226, "渡": 2227, "渣": 2228, "渤": 2229, "渥": 2230, "温": 2231, "渭": 2232, "港": 2233, "渲": 2234, "渴": 2235, "游": 2236, "湄": 2237, "湍": 2238, "湎": 2239, "湓": 2240, "湖": 2241, "湘": 2242, "湛": 2243, "湜": 2244, "湟": 2245, "湳": 2246, "湾": 2247, "湿": 2248, "溃": 2249, "溅": 2250, "溉": 2251, "源": 2252, "準": 2253, "溞": 2254, "溢": 2255, "溥": 2256, "溧": 2257, "溪": 2258, "溯": 2259, "溲": 2260, "溴": 2261, "溶": 2262, "溺": 2263, "滁": 2264, "滇": 2265, "滋": 2266, "滑": 2267, "滔": 2268, "滕": 2269, "滚": 2270, "滞": 2271, "满": 2272, "滤": 2273, "滥": 2274, "滦": 2275, "滨": 2276, "滩": 2277, "滴": 2278, "漂": 2279, "漆": 2280, "漏": 2281, "演": 2282, "漕": 2283, "漠": 2284, "漩": 2285, "漪": 2286, "漫": 2287, "漳": 2288, "漾": 2289, "潇": 2290, "潍": 2291, "潘": 2292, "潜": 2293, "潞": 2294, "潢": 2295, "潦": 2296, "潭": 2297, "潮": 2298, "潼": 2299, "澄": 2300, "澈": 2301, "澍": 2302, "澎": 2303, "澜": 2304, "澡": 2305, "澥": 2306, "澧": 2307, "澳": 2308, "澶": 2309, "激": 2310, "濂": 2311, "濉": 2312, "濑": 2313, "濒": 2314, "濠": 2315, "濡": 2316, "濮": 2317, "濯": 2318, "瀑": 2319, "瀚": 2320, "瀛": 2321, "瀼": 2322, "灌": 2323, "灏": 2324, "火": 2325, "灭": 2326, "灯": 2327, "灰": 2328, "灵": 2329, "灶": 2330, "灸": 2331, "灼": 2332, "灾": 2333, "灿": 2334, "炀": 2335, "炉": 2336, "炎": 2337, "炒": 2338, "炔": 2339, "炕": 2340, "炖": 2341, "炜": 2342, "炫": 2343, "炬": 2344, "炭": 2345, "炮": 2346, "炳": 2347, "炸": 2348, "点": 2349, "炼": 2350, "炽": 2351, "烁": 2352, "烂": 2353, "烃": 2354, "烈": 2355, "烘": 2356, "烙": 2357, "烛": 2358, "烟": 2359, "烤": 2360, "烦": 2361, "烧": 2362, "烨": 2363, "热": 2364, "烯": 2365, "烷": 2366, "烹": 2367, "烺": 2368, "烽": 2369, "焉": 2370, "焊": 2371, "焕": 2372, "焙": 2373, "焚": 2374, "焦": 2375, "焮": 2376, "焯": 2377, "焰": 2378, "焱": 2379, "然": 2380, "煊": 2381, "煌": 2382, "煎": 2383, "煜": 2384, "煞": 2385, "煤": 2386, "煦": 2387, "照": 2388, "煮": 2389, "煲": 2390, "煽": 2391, "熄": 2392, "熈": 2393, "熊": 2394, "熔": 2395, "熙": 2396, "熟": 2397, "熠": 2398, "熬": 2399, "熹": 2400, "燃": 2401, "燏": 2402, "燕": 2403, "燥": 2404, "燮": 2405, "燹": 2406, "爆": 2407, "爪": 2408, "爬": 2409, "爱": 2410, "爵": 2411, "父": 2412, "爷": 2413, "爸": 2414, "爹": 2415, "爽": 2416, "牁": 2417, "牂": 2418, "片": 2419, "版": 2420, "牌": 2421, "牕": 2422, "牙": 2423, "牛": 2424, "牟": 2425, "牡": 2426, "牢": 2427, "牦": 2428, "牧": 2429, "物": 2430, "牲": 2431, "牵": 2432, "特": 2433, "牺": 2434, "牻": 2435, "犀": 2436, "犁": 2437, "犍": 2438, "犬": 2439, "犯": 2440, "状": 2441, "犷": 2442, "犹": 2443, "狂": 2444, "狄": 2445, "狐": 2446, "狒": 2447, "狗": 2448, "狙": 2449, "狠": 2450, "狡": 2451, "狩": 2452, "独": 2453, "狭": 2454, "狮": 2455, "狯": 2456, "狱": 2457, "狷": 2458, "狸": 2459, "狼": 2460, "猄": 2461, "猎": 2462, "猖": 2463, "猗": 2464, "猛": 2465, "猜": 2466, "猝": 2467, "猩": 2468, "猪": 2469, "猫": 2470, "猬": 2471, "献": 2472, "猴": 2473, "猾": 2474, "猿": 2475, "獐": 2476, "獗": 2477, "獭": 2478, "獴": 2479, "玄": 2480, "率": 2481, "玉": 2482, "王": 2483, "玎": 2484, "玑": 2485, "玖": 2486, "玛": 2487, "玠": 2488, "玩": 2489, "玫": 2490, "玭": 2491, "玮": 2492, "环": 2493, "现": 2494, "玲": 2495, "玶": 2496, "玹": 2497, "玺": 2498, "玻": 2499, "珀": 2500, "珂": 2501, "珈": 2502, "珊": 2503, "珍": 2504, "珐": 2505, "珑": 2506, "珙": 2507, "珞": 2508, "珠": 2509, "珩": 2510, "班": 2511, "珰": 2512, "珲": 2513, "珺": 2514, "球": 2515, "琅": 2516, "理": 2517, "琉": 2518, "琊": 2519, "琏": 2520, "琐": 2521, "琚": 2522, "琛": 2523, "琢": 2524, "琥": 2525, "琦": 2526, "琨": 2527, "琪": 2528, "琬": 2529, "琮": 2530, "琰": 2531, "琳": 2532, "琴": 2533, "琵": 2534, "琶": 2535, "琼": 2536, "瑀": 2537, "瑄": 2538, "瑙": 2539, "瑚": 2540, "瑛": 2541, "瑜": 2542, "瑞": 2543, "瑟": 2544, "瑭": 2545, "瑮": 2546, "瑰": 2547, "瑳": 2548, "瑶": 2549, "瑷": 2550, "瑾": 2551, "璃": 2552, "璆": 2553, "璇": 2554, "璋": 2555, "璎": 2556, "璜": 2557, "璟": 2558, "璧": 2559, "璹": 2560, "瓒": 2561, "瓘": 2562, "瓛": 2563, "瓜": 2564, "瓢": 2565, "瓣": 2566, "瓦": 2567, "瓮": 2568, "瓯": 2569, "瓶": 2570, "瓷": 2571, "甄": 2572, "甘": 2573, "甚": 2574, "甜": 2575, "生": 2576, "甥": 2577, "用": 2578, "甫": 2579, "甬": 2580, "田": 2581, "由": 2582, "甲": 2583, "申": 2584, "电": 2585, "男": 2586, "甸": 2587, "町": 2588, "画": 2589, "甾": 2590, "畅": 2591, "畈": 2592, "畋": 2593, "界": 2594, "畏": 2595, "畔": 2596, "留": 2597, "畜": 2598, "略": 2599, "番": 2600, "畲": 2601, "畴": 2602, "畸": 2603, "畿": 2604, "疃": 2605, "疆": 2606, "疍": 2607, "疏": 2608, "疑": 2609, "疖": 2610, "疗": 2611, "疟": 2612, "疡": 2613, "疣": 2614, "疤": 2615, "疫": 2616, "疮": 2617, "疯": 2618, "疲": 2619, "疹": 2620, "疼": 2621, "疾": 2622, "病": 2623, "症": 2624, "痉": 2625, "痒": 2626, "痕": 2627, "痘": 2628, "痛": 2629, "痢": 2630, "痪": 2631, "痫": 2632, "痴": 2633, "痹": 2634, "痼": 2635, "瘟": 2636, "瘤": 2637, "瘦": 2638, "瘫": 2639, "瘰": 2640, "瘾": 2641, "瘿": 2642, "癌": 2643, "癣": 2644, "癫": 2645, "癸": 2646, "登": 2647, "白": 2648, "百": 2649, "皂": 2650, "的": 2651, "皆": 2652, "皇": 2653, "皋": 2654, "皓": 2655, "皕": 2656, "皖": 2657, "皮": 2658, "皱": 2659, "皿": 2660, "盂": 2661, "盆": 2662, "盈": 2663, "益": 2664, "盏": 2665, "盐": 2666, "监": 2667, "盒": 2668, "盔": 2669, "盖": 2670, "盗": 2671, "盘": 2672, "盛": 2673, "盟": 2674, "盥": 2675, "目": 2676, "盯": 2677, "盱": 2678, "盲": 2679, "直": 2680, "相": 2681, "盼": 2682, "盾": 2683, "省": 2684, "眉": 2685, "看": 2686, "県": 2687, "眙": 2688, "真": 2689, "眠": 2690, "眩": 2691, "眶": 2692, "眷": 2693, "眺": 2694, "眼": 2695, "着": 2696, "睁": 2697, "睐": 2698, "睛": 2699, "睡": 2700, "睢": 2701, "督": 2702, "睦": 2703, "睫": 2704, "睹": 2705, "睽": 2706, "睾": 2707, "睿": 2708, "瞄": 2709, "瞎": 2710, "瞒": 2711, "瞧": 2712, "瞩": 2713, "瞬": 2714, "瞭": 2715, "瞳": 2716, "瞻": 2717, "瞽": 2718, "瞿": 2719, "矍": 2720, "矗": 2721, "矛": 2722, "矢": 2723, "矣": 2724, "知": 2725, "矩": 2726, "矫": 2727, "短": 2728, "矮": 2729, "石": 2730, "矶": 2731, "矾": 2732, "矿": 2733, "砀": 2734, "码": 2735, "砂": 2736, "砌": 2737, "砍": 2738, "砒": 2739, "研": 2740, "砖": 2741, "砗": 2742, "砚": 2743, "砥": 2744, "破": 2745, "砵": 2746, "砷": 2747, "砸": 2748, "砻": 2749, "砾": 2750, "础": 2751, "硅": 2752, "硒": 2753, "硕": 2754, "硖": 2755, "硝": 2756, "硫": 2757, "硬": 2758, "确": 2759, "硼": 2760, "碉": 2761, "碍": 2762, "碎": 2763, "碑": 2764, "碗": 2765, "碘": 2766, "碟": 2767, "碧": 2768, "碰": 2769, "碱": 2770, "碲": 2771, "碳": 2772, "碾": 2773, "磁": 2774, "磅": 2775, "磐": 2776, "磡": 2777, "磨": 2778, "磲": 2779, "磷": 2780, "礁": 2781, "示": 2782, "礼": 2783, "社": 2784, "祀": 2785, "祁": 2786, "祈": 2787, "祉": 2788, "祋": 2789, "祎": 2790, "祕": 2791, "祖": 2792, "祗": 2793, "祚": 2794, "祛": 2795, "祜": 2796, "祝": 2797, "神": 2798, "祠": 2799, "祥": 2800, "票": 2801, "祭": 2802, "祯": 2803, "祷": 2804, "祸": 2805, "祺": 2806, "禁": 2807, "禄": 2808, "禅": 2809, "福": 2810, "禑": 2811, "禧": 2812, "禹": 2813, "禺": 2814, "离": 2815, "禾": 2816, "秀": 2817, "私": 2818, "秆": 2819, "秉": 2820, "秋": 2821, "种": 2822, "科": 2823, "秒": 2824, "秘": 2825, "租": 2826, "秤": 2827, "秦": 2828, "秧": 2829, "秩": 2830, "积": 2831, "称": 2832, "移": 2833, "秽": 2834, "稀": 2835, "稃": 2836, "程": 2837, "稍": 2838, "税": 2839, "稔": 2840, "稗": 2841, "稚": 2842, "稞": 2843, "稠": 2844, "稣": 2845, "稳": 2846, "稷": 2847, "稺": 2848, "稻": 2849, "稼": 2850, "稽": 2851, "稿": 2852, "穆": 2853, "穗": 2854, "穴": 2855, "究": 2856, "穷": 2857, "穹": 2858, "空": 2859, "穿": 2860, "突": 2861, "窃": 2862, "窄": 2863, "窈": 2864, "窑": 2865, "窒": 2866, "窕": 2867, "窖": 2868, "窗": 2869, "窜": 2870, "窝": 2871, "窟": 2872, "窥": 2873, "窦": 2874, "竈": 2875, "立": 2876, "竖": 2877, "站": 2878, "竞": 2879, "竟": 2880, "章": 2881, "竣": 2882, "童": 2883, "竭": 2884, "端": 2885, "竹": 2886, "竺": 2887, "竿": 2888, "笃": 2889, "笄": 2890, "笆": 2891, "笋": 2892, "笏": 2893, "笑": 2894, "笔": 2895, "笙": 2896, "笛": 2897, "笞": 2898, "笠": 2899, "符": 2900, "笨": 2901, "第": 2902, "笮": 2903, "笼": 2904, "筅": 2905, "等": 2906, "筋": 2907, "筐": 2908, "筑": 2909, "筒": 2910, "答": 2911, "策": 2912, "筛": 2913, "筮": 2914, "筱": 2915, "筲": 2916, "筷": 2917, "筹": 2918, "签": 2919, "简": 2920, "箕": 2921, "算": 2922, "管": 2923, "箨": 2924, "箩": 2925, "箬": 2926, "箭": 2927, "箱": 2928, "箴": 2929, "篆": 2930, "篇": 2931, "篙": 2932, "篡": 2933, "篦": 2934, "篮": 2935, "篱": 2936, "篷": 2937, "簇": 2938, "簕": 2939, "簧": 2940, "簪": 2941, "簸": 2942, "簽": 2943, "簿": 2944, "籁": 2945, "籍": 2946, "米": 2947, "籴": 2948, "类": 2949, "籽": 2950, "粉": 2951, "粒": 2952, "粗": 2953, "粘": 2954, "粟": 2955, "粤": 2956, "粥": 2957, "粪": 2958, "粮": 2959, "粲": 2960, "粹": 2961, "精": 2962, "糊": 2963, "糕": 2964, "糖": 2965, "糙": 2966, "糟": 2967, "糠": 2968, "糯": 2969, "系": 2970, "紊": 2971, "紑": 2972, "素": 2973, "索": 2974, "紧": 2975, "紫": 2976, "累": 2977, "絮": 2978, "綖": 2979, "綦": 2980, "緁": 2981, "縻": 2982, "繁": 2983, "纂": 2984, "纠": 2985, "红": 2986, "纤": 2987, "纥": 2988, "约": 2989, "级": 2990, "纪": 2991, "纬": 2992, "纭": 2993, "纮": 2994, "纯": 2995, "纱": 2996, "纲": 2997, "纳": 2998, "纵": 2999, "纶": 3000, "纷": 3001, "纸": 3002, "纹": 3003, "纺": 3004, "纻": 3005, "纽": 3006, "线": 3007, "绂": 3008, "练": 3009, "组": 3010, "绅": 3011, "细": 3012, "织": 3013, "终": 3014, "绊": 3015, "绍": 3016, "绎": 3017, "经": 3018, "绑": 3019, "绒": 3020, "结": 3021, "绕": 3022, "绘": 3023, "给": 3024, "绚": 3025, "绛": 3026, "络": 3027, "绝": 3028, "绞": 3029, "统": 3030, "绡": 3031, "绢": 3032, "绣": 3033, "绥": 3034, "绦": 3035, "继": 3036, "绩": 3037, "绪": 3038, "绫": 3039, "续": 3040, "绮": 3041, "绯": 3042, "绰": 3043, "绳": 3044, "维": 3045, "绵": 3046, "绶": 3047, "绸": 3048, "综": 3049, "绽": 3050, "绿": 3051, "缀": 3052, "缅": 3053, "缆": 3054, "缇": 3055, "缉": 3056, "缓": 3057, "缔": 3058, "缕": 3059, "编": 3060, "缘": 3061, "缙": 3062, "缚": 3063, "缜": 3064, "缝": 3065, "缠": 3066, "缨": 3067, "缩": 3068, "缪": 3069, "缮": 3070, "缴": 3071, "缵": 3072, "缸": 3073, "缺": 3074, "罂": 3075, "罄": 3076, "罐": 3077, "网": 3078, "罔": 3079, "罕": 3080, "罗": 3081, "罘": 3082, "罚": 3083, "罢": 3084, "罩": 3085, "罪": 3086, "置": 3087, "署": 3088, "罹": 3089, "罽": 3090, "羁": 3091, "羊": 3092, "羌": 3093, "美": 3094, "羚": 3095, "羞": 3096, "羟": 3097, "羡": 3098, "群": 3099, "羧": 3100, "羯": 3101, "羰": 3102, "羲": 3103, "羽": 3104, "翁": 3105, "翃": 3106, "翅": 3107, "翊": 3108, "翌": 3109, "翎": 3110, "翔": 3111, "翘": 3112, "翟": 3113, "翠": 3114, "翡": 3115, "翥": 3116, "翦": 3117, "翰": 3118, "翱": 3119, "翻": 3120, "翼": 3121, "翽": 3122, "耀": 3123, "老": 3124, "考": 3125, "者": 3126, "耆": 3127, "而": 3128, "耍": 3129, "耐": 3130, "耕": 3131, "耗": 3132, "耘": 3133, "耦": 3134, "耧": 3135, "耨": 3136, "耳": 3137, "耶": 3138, "耸": 3139, "耻": 3140, "耽": 3141, "耿": 3142, "聂": 3143, "聆": 3144, "聊": 3145, "聋": 3146, "职": 3147, "联": 3148, "聘": 3149, "聚": 3150, "聪": 3151, "聿": 3152, "肃": 3153, "肄": 3154, "肆": 3155, "肇": 3156, "肉": 3157, "肋": 3158, "肌": 3159, "肖": 3160, "肘": 3161, "肚": 3162, "肛": 3163, "肝": 3164, "肟": 3165, "肠": 3166, "股": 3167, "肢": 3168, "肤": 3169, "肥": 3170, "肩": 3171, "肪": 3172, "肯": 3173, "肱": 3174, "育": 3175, "肴": 3176, "肺": 3177, "肼": 3178, "肽": 3179, "肾": 3180, "肿": 3181, "胀": 3182, "胁": 3183, "胃": 3184, "胄": 3185, "胆": 3186, "背": 3187, "胍": 3188, "胎": 3189, "胖": 3190, "胚": 3191, "胛": 3192, "胜": 3193, "胞": 3194, "胡": 3195, "胤": 3196, "胥": 3197, "胪": 3198, "胫": 3199, "胭": 3200, "胰": 3201, "胱": 3202, "胳": 3203, "胶": 3204, "胸": 3205, "胺": 3206, "胼": 3207, "能": 3208, "脂": 3209, "脆": 3210, "脉": 3211, "脊": 3212, "脏": 3213, "脐": 3214, "脑": 3215, "脓": 3216, "脖": 3217, "脚": 3218, "脱": 3219, "脲": 3220, "脸": 3221, "脾": 3222, "腈": 3223, "腊": 3224, "腌": 3225, "腐": 3226, "腓": 3227, "腔": 3228, "腕": 3229, "腥": 3230, "腧": 3231, "腭": 3232, "腮": 3233, "腰": 3234, "腱": 3235, "腹": 3236, "腺": 3237, "腻": 3238, "腾": 3239, "腿": 3240, "膀": 3241, "膈": 3242, "膊": 3243, "膏": 3244, "膑": 3245, "膛": 3246, "膜": 3247, "膝": 3248, "膦": 3249, "膨": 3250, "膳": 3251, "膺": 3252, "膻": 3253, "臀": 3254, "臂": 3255, "臣": 3256, "臧": 3257, "自": 3258, "臭": 3259, "臯": 3260, "至": 3261, "致": 3262, "臻": 3263, "臼": 3264, "舄": 3265, "舅": 3266, "舆": 3267, "舌": 3268, "舍": 3269, "舒": 3270, "舘": 3271, "舜": 3272, "舞": 3273, "舟": 3274, "航": 3275, "舫": 3276, "般": 3277, "舰": 3278, "舱": 3279, "舶": 3280, "船": 3281, "艇": 3282, "艘": 3283, "艮": 3284, "良": 3285, "艰": 3286, "色": 3287, "艳": 3288, "艺": 3289, "艾": 3290, "节": 3291, "芃": 3292, "芈": 3293, "芊": 3294, "芋": 3295, "芎": 3296, "芒": 3297, "芗": 3298, "芙": 3299, "芜": 3300, "芝": 3301, "芥": 3302, "芦": 3303, "芩": 3304, "芪": 3305, "芬": 3306, "芭": 3307, "芮": 3308, "芯": 3309, "芰": 3310, "花": 3311, "芳": 3312, "芷": 3313, "芸": 3314, "芹": 3315, "芽": 3316, "苁": 3317, "苄": 3318, "苇": 3319, "苈": 3320, "苋": 3321, "苌": 3322, "苍": 3323, "苎": 3324, "苏": 3325, "苑": 3326, "苓": 3327, "苔": 3328, "苗": 3329, "苛": 3330, "苞": 3331, "苟": 3332, "苡": 3333, "苣": 3334, "若": 3335, "苦": 3336, "苯": 3337, "英": 3338, "苳": 3339, "苴": 3340, "苷": 3341, "苹": 3342, "苻": 3343, "苾": 3344, "茂": 3345, "范": 3346, "茄": 3347, "茅": 3348, "茉": 3349, "茌": 3350, "茎": 3351, "茔": 3352, "茛": 3353, "茜": 3354, "茧": 3355, "茨": 3356, "茫": 3357, "茯": 3358, "茱": 3359, "茵": 3360, "茶": 3361, "茸": 3362, "茹": 3363, "荀": 3364, "荁": 3365, "荃": 3366, "荆": 3367, "草": 3368, "荐": 3369, "荒": 3370, "荔": 3371, "荚": 3372, "荛": 3373, "荞": 3374, "荠": 3375, "荡": 3376, "荣": 3377, "荥": 3378, "荦": 3379, "荧": 3380, "荨": 3381, "荩": 3382, "荪": 3383, "荫": 3384, "药": 3385, "荷": 3386, "荸": 3387, "荼": 3388, "荽": 3389, "莅": 3390, "莆": 3391, "莉": 3392, "莎": 3393, "莒": 3394, "莓": 3395, "莘": 3396, "莞": 3397, "莩": 3398, "莪": 3399, "莫": 3400, "莱": 3401, "莲": 3402, "莴": 3403, "获": 3404, "莸": 3405, "莹": 3406, "莺": 3407, "莼": 3408, "莽": 3409, "菀": 3410, "菁": 3411, "菅": 3412, "菇": 3413, "菉": 3414, "菊": 3415, "菌": 3416, "菏": 3417, "菖": 3418, "菜": 3419, "菝": 3420, "菠": 3421, "菩": 3422, "菰": 3423, "菱": 3424, "菲": 3425, "萁": 3426, "萄": 3427, "萌": 3428, "萍": 3429, "萎": 3430, "萘": 3431, "萜": 3432, "萝": 3433, "萤": 3434, "营": 3435, "萦": 3436, "萧": 3437, "萨": 3438, "萱": 3439, "萸": 3440, "萼": 3441, "落": 3442, "葆": 3443, "葎": 3444, "著": 3445, "葛": 3446, "葜": 3447, "葡": 3448, "董": 3449, "葫": 3450, "葬": 3451, "葱": 3452, "葳": 3453, "葵": 3454, "葶": 3455, "蒂": 3456, "蒋": 3457, "蒙": 3458, "蒜": 3459, "蒟": 3460, "蒲": 3461, "蒴": 3462, "蒸": 3463, "蒺": 3464, "蒿": 3465, "蓄": 3466, "蓉": 3467, "蓍": 3468, "蓝": 3469, "蓟": 3470, "蓣": 3471, "蓬": 3472, "蓼": 3473, "蔑": 3474, "蔓": 3475, "蔗": 3476, "蔚": 3477, "蔡": 3478, "蔬": 3479, "蔵": 3480, "蔷": 3481, "蔺": 3482, "蔻": 3483, "蔽": 3484, "蕃": 3485, "蕈": 3486, "蕉": 3487, "蕊": 3488, "蕨": 3489, "蕲": 3490, "蕴": 3491, "蕾": 3492, "薄": 3493, "薇": 3494, "薖": 3495, "薛": 3496, "薨": 3497, "薪": 3498, "薮": 3499, "薯": 3500, "薹": 3501, "藁": 3502, "藉": 3503, "藏": 3504, "藓": 3505, "藔": 3506, "藕": 3507, "藜": 3508, "藤": 3509, "藨": 3510, "藩": 3511, "藳": 3512, "藻": 3513, "藿": 3514, "蘑": 3515, "蘸": 3516, "虎": 3517, "虏": 3518, "虐": 3519, "虑": 3520, "虔": 3521, "虚": 3522, "虞": 3523, "虫": 3524, "虬": 3525, "虱": 3526, "虹": 3527, "虻": 3528, "虽": 3529, "虾": 3530, "蚀": 3531, "蚁": 3532, "蚂": 3533, "蚊": 3534, "蚌": 3535, "蚓": 3536, "蚕": 3537, "蚜": 3538, "蚣": 3539, "蚤": 3540, "蚨": 3541, "蚪": 3542, "蚬": 3543, "蚯": 3544, "蚶": 3545, "蚺": 3546, "蛄": 3547, "蛇": 3548, "蛉": 3549, "蛊": 3550, "蛋": 3551, "蛎": 3552, "蛏": 3553, "蛙": 3554, "蛛": 3555, "蛤": 3556, "蛭": 3557, "蛮": 3558, "蛱": 3559, "蛲": 3560, "蛳": 3561, "蛸": 3562, "蛹": 3563, "蛾": 3564, "蜀": 3565, "蜂": 3566, "蜃": 3567, "蜈": 3568, "蜊": 3569, "蜍": 3570, "蜑": 3571, "蜒": 3572, "蜓": 3573, "蜕": 3574, "蜗": 3575, "蜘": 3576, "蜚": 3577, "蜜": 3578, "蜡": 3579, "蜢": 3580, "蜥": 3581, "蜱": 3582, "蜴": 3583, "蜻": 3584, "蜿": 3585, "蝇": 3586, "蝉": 3587, "蝌": 3588, "蝎": 3589, "蝙": 3590, "蝠": 3591, "蝥": 3592, "蝴": 3593, "蝶": 3594, "蝽": 3595, "蝾": 3596, "螂": 3597, "螃": 3598, "螈": 3599, "融": 3600, "螟": 3601, "螨": 3602, "螭": 3603, "螯": 3604, "螺": 3605, "蟀": 3606, "蟆": 3607, "蟋": 3608, "蟑": 3609, "蟒": 3610, "蟳": 3611, "蟹": 3612, "蟾": 3613, "蠋": 3614, "蠓": 3615, "蠕": 3616, "蠡": 3617, "蠹": 3618, "血": 3619, "衅": 3620, "行": 3621, "衍": 3622, "衔": 3623, "街": 3624, "衙": 3625, "衡": 3626, "衢": 3627, "衣": 3628, "补": 3629, "表": 3630, "衫": 3631, "衬": 3632, "衰": 3633, "衷": 3634, "袁": 3635, "袋": 3636, "袍": 3637, "袒": 3638, "袓": 3639, "袖": 3640, "袗": 3641, "袜": 3642, "被": 3643, "袭": 3644, "裁": 3645, "裂": 3646, "装": 3647, "裔": 3648, "裕": 3649, "裘": 3650, "裙": 3651, "裤": 3652, "裬": 3653, "裴": 3654, "裸": 3655, "裹": 3656, "裾": 3657, "褐": 3658, "褒": 3659, "褚": 3660, "褧": 3661, "褪": 3662, "褶": 3663, "襄": 3664, "襟": 3665, "西": 3666, "要": 3667, "覃": 3668, "覆": 3669, "见": 3670, "观": 3671, "规": 3672, "觅": 3673, "视": 3674, "览": 3675, "觉": 3676, "觐": 3677, "觑": 3678, "角": 3679, "觚": 3680, "解": 3681, "触": 3682, "言": 3683, "詥": 3684, "詹": 3685, "誉": 3686, "誓": 3687, "諡": 3688, "諲": 3689, "謇": 3690, "警": 3691, "譬": 3692, "讚": 3693, "计": 3694, "订": 3695, "讣": 3696, "认": 3697, "讨": 3698, "让": 3699, "讪": 3700, "讫": 3701, "训": 3702, "议": 3703, "讯": 3704, "记": 3705, "讲": 3706, "讳": 3707, "讶": 3708, "讷": 3709, "许": 3710, "讹": 3711, "论": 3712, "讼": 3713, "讽": 3714, "设": 3715, "访": 3716, "诀": 3717, "证": 3718, "诃": 3719, "评": 3720, "识": 3721, "诈": 3722, "诉": 3723, "诊": 3724, "词": 3725, "诏": 3726, "译": 3727, "试": 3728, "诗": 3729, "诙": 3730, "诚": 3731, "诛": 3732, "话": 3733, "诞": 3734, "诟": 3735, "诠": 3736, "诡": 3737, "询": 3738, "诣": 3739, "该": 3740, "详": 3741, "诬": 3742, "语": 3743, "误": 3744, "诰": 3745, "诱": 3746, "说": 3747, "诵": 3748, "请": 3749, "诸": 3750, "诹": 3751, "诺": 3752, "读": 3753, "诽": 3754, "课": 3755, "谁": 3756, "调": 3757, "谅": 3758, "谈": 3759, "谊": 3760, "谋": 3761, "谌": 3762, "谍": 3763, "谎": 3764, "谏": 3765, "谐": 3766, "谒": 3767, "谓": 3768, "谕": 3769, "谗": 3770, "谚": 3771, "谛": 3772, "谜": 3773, "谟": 3774, "谠": 3775, "谢": 3776, "谣": 3777, "谤": 3778, "谥": 3779, "谦": 3780, "谨": 3781, "谪": 3782, "谬": 3783, "谭": 3784, "谯": 3785, "谱": 3786, "谲": 3787, "谴": 3788, "谶": 3789, "谷": 3790, "豁": 3791, "豆": 3792, "豇": 3793, "豌": 3794, "豚": 3795, "象": 3796, "豪": 3797, "豫": 3798, "豹": 3799, "貂": 3800, "貊": 3801, "貌": 3802, "貘": 3803, "贝": 3804, "贞": 3805, "负": 3806, "贡": 3807, "财": 3808, "责": 3809, "贤": 3810, "败": 3811, "账": 3812, "货": 3813, "质": 3814, "贩": 3815, "贪": 3816, "贫": 3817, "贬": 3818, "购": 3819, "贮": 3820, "贯": 3821, "贰": 3822, "贴": 3823, "贵": 3824, "贷": 3825, "贸": 3826, "费": 3827, "贺": 3828, "贻": 3829, "贼": 3830, "贾": 3831, "贿": 3832, "赁": 3833, "赂": 3834, "赃": 3835, "资": 3836, "赈": 3837, "赉": 3838, "赋": 3839, "赌": 3840, "赎": 3841, "赏": 3842, "赐": 3843, "赓": 3844, "赔": 3845, "赖": 3846, "赘": 3847, "赚": 3848, "赛": 3849, "赝": 3850, "赞": 3851, "赟": 3852, "赠": 3853, "赢": 3854, "赣": 3855, "赤": 3856, "赦": 3857, "赫": 3858, "赭": 3859, "走": 3860, "赴": 3861, "赵": 3862, "赶": 3863, "起": 3864, "趁": 3865, "超": 3866, "越": 3867, "趋": 3868, "趟": 3869, "趣": 3870, "足": 3871, "趴": 3872, "趺": 3873, "趾": 3874, "跃": 3875, "跆": 3876, "跋": 3877, "跌": 3878, "跑": 3879, "跖": 3880, "跗": 3881, "跚": 3882, "距": 3883, "跟": 3884, "跨": 3885, "跪": 3886, "路": 3887, "跳": 3888, "践": 3889, "跻": 3890, "踊": 3891, "踏": 3892, "踢": 3893, "踩": 3894, "踪": 3895, "蹄": 3896, "蹈": 3897, "蹒": 3898, "蹴": 3899, "蹶": 3900, "身": 3901, "躯": 3902, "躲": 3903, "车": 3904, "轧": 3905, "轨": 3906, "轩": 3907, "转": 3908, "轭": 3909, "轮": 3910, "软": 3911, "轰": 3912, "轲": 3913, "轴": 3914, "轶": 3915, "轸": 3916, "轻": 3917, "载": 3918, "轿": 3919, "较": 3920, "辅": 3921, "辆": 3922, "辇": 3923, "辈": 3924, "辉": 3925, "辍": 3926, "辐": 3927, "辑": 3928, "输": 3929, "辕": 3930, "辖": 3931, "辗": 3932, "辙": 3933, "辛": 3934, "辜": 3935, "辞": 3936, "辟": 3937, "辣": 3938, "辨": 3939, "辩": 3940, "辰": 3941, "辱": 3942, "边": 3943, "辻": 3944, "込": 3945, "辽": 3946, "达": 3947, "迁": 3948, "迄": 3949, "迅": 3950, "过": 3951, "迈": 3952, "迎": 3953, "运": 3954, "近": 3955, "返": 3956, "还": 3957, "这": 3958, "进": 3959, "远": 3960, "违": 3961, "连": 3962, "迟": 3963, "迥": 3964, "迦": 3965, "迪": 3966, "迫": 3967, "迭": 3968, "述": 3969, "迷": 3970, "迹": 3971, "追": 3972, "退": 3973, "送": 3974, "适": 3975, "逃": 3976, "逅": 3977, "逆": 3978, "选": 3979, "逊": 3980, "逍": 3981, "透": 3982, "逐": 3983, "递": 3984, "途": 3985, "逗": 3986, "通": 3987, "逛": 3988, "逝": 3989, "速": 3990, "造": 3991, "逡": 3992, "逢": 3993, "逮": 3994, "逵": 3995, "逸": 3996, "逻": 3997, "逼": 3998, "逾": 3999, "遁": 4000, "遂": 4001, "遇": 4002, "遍": 4003, "遏": 4004, "遐": 4005, "遑": 4006, "道": 4007, "遗": 4008, "遣": 4009, "遥": 4010, "遭": 4011, "遮": 4012, "遴": 4013, "遵": 4014, "避": 4015, "邀": 4016, "邂": 4017, "邃": 4018, "邈": 4019, "邑": 4020, "邓": 4021, "邕": 4022, "邢": 4023, "��": 4024, "邦": 4025, "邨": 4026, "邪": 4027, "邬": 4028, "邮": 4029, "邯": 4030, "邰": 4031, "邱": 4032, "邳": 4033, "邵": 4034, "邸": 4035, "邹": 4036, "邺": 4037, "邻": 4038, "郁": 4039, "郃": 4040, "郈": 4041, "郊": 4042, "郎": 4043, "郏": 4044, "郑": 4045, "郓": 4046, "郝": 4047, "郡": 4048, "郤": 4049, "郦": 4050, "部": 4051, "郭": 4052, "郯": 4053, "郴": 4054, "郸": 4055, "都": 4056, "郾": 4057, "郿": 4058, "鄂": 4059, "鄞": 4060, "鄢": 4061, "鄱": 4062, "酃": 4063, "酉": 4064, "酋": 4065, "酌": 4066, "配": 4067, "酐": 4068, "酒": 4069, "酗": 4070, "酚": 4071, "酢": 4072, "酤": 4073, "酥": 4074, "酪": 4075, "酬": 4076, "酮": 4077, "酯": 4078, "酰": 4079, "酱": 4080, "酵": 4081, "酶": 4082, "酷": 4083, "酸": 4084, "酿": 4085, "醇": 4086, "醉": 4087, "醋": 4088, "醒": 4089, "醚": 4090, "醛": 4091, "醮": 4092, "醯": 4093, "采": 4094, "釉": 4095, "释": 4096, "里": 4097, "重": 4098, "野": 4099, "量": 4100, "金": 4101, "釜": 4102, "鉴": 4103, "銮": 4104, "鋆": 4105, "鋐": 4106, "鍊": 4107, "鎏": 4108, "鏊": 4109, "鑫": 4110, "针": 4111, "钉": 4112, "钊": 4113, "钌": 4114, "钍": 4115, "钓": 4116, "钕": 4117, "钗": 4118, "钙": 4119, "钛": 4120, "钜": 4121, "钝": 4122, "钞": 4123, "钟": 4124, "钠": 4125, "钡": 4126, "钢": 4127, "钥": 4128, "钦": 4129, "钧": 4130, "钨": 4131, "钩": 4132, "钪": 4133, "钫": 4134, "钬": 4135, "钮": 4136, "钯": 4137, "钰": 4138, "钱": 4139, "钲": 4140, "钴": 4141, "钵": 4142, "钹": 4143, "钻": 4144, "钼": 4145, "钾": 4146, "钿": 4147, "铀": 4148, "铁": 4149, "铂": 4150, "铃": 4151, "铅": 4152, "铆": 4153, "铉": 4154, "铊": 4155, "铋": 4156, "铎": 4157, "铑": 4158, "铜": 4159, "铝": 4160, "铟": 4161, "铠": 4162, "铣": 4163, "铨": 4164, "铪": 4165, "铬": 4166, "铭": 4167, "铮": 4168, "铯": 4169, "铰": 4170, "铱": 4171, "铲": 4172, "铳": 4173, "铵": 4174, "银": 4175, "铸": 4176, "铺": 4177, "铼": 4178, "铽": 4179, "链": 4180, "铿": 4181, "销": 4182, "锁": 4183, "锂": 4184, "锅": 4185, "锆": 4186, "锈": 4187, "锉": 4188, "锋": 4189, "锌": 4190, "锎": 4191, "锐": 4192, "锑": 4193, "锗": 4194, "错": 4195, "锚": 4196, "锡": 4197, "锣": 4198, "锤": 4199, "锥": 4200, "锦": 4201, "锫": 4202, "键": 4203, "锯": 4204, "锰": 4205, "锺": 4206, "锻": 4207, "镀": 4208, "镁": 4209, "镂": 4210, "镇": 4211, "镉": 4212, "镊": 4213, "镍": 4214, "镎": 4215, "镐": 4216, "镒": 4217, "镓": 4218, "镖": 4219, "镗": 4220, "镛": 4221, "镜": 4222, "镠": 4223, "镤": 4224, "镧": 4225, "镰": 4226, "镳": 4227, "镶": 4228, "长": 4229, "閒": 4230, "闍": 4231, "门": 4232, "闪": 4233, "闫": 4234, "闭": 4235, "问": 4236, "闯": 4237, "闰": 4238, "闱": 4239, "闲": 4240, "闳": 4241, "间": 4242, "闵": 4243, "闸": 4244, "闹": 4245, "闻": 4246, "闼": 4247, "闽": 4248, "闾": 4249, "阀": 4250, "阁": 4251, "阅": 4252, "阇": 4253, "阈": 4254, "阉": 4255, "阎": 4256, "阏": 4257, "阐": 4258, "阑": 4259, "阔": 4260, "阕": 4261, "阖": 4262, "阙": 4263, "阜": 4264, "队": 4265, "阪": 4266, "阮": 4267, "阱": 4268, "防": 4269, "阳": 4270, "阴": 4271, "阵": 4272, "阶": 4273, "阻": 4274, "阿": 4275, "陀": 4276, "陁": 4277, "陂": 4278, "附": 4279, "际": 4280, "陆": 4281, "陇": 4282, "陈": 4283, "陋": 4284, "陌": 4285, "降": 4286, "限": 4287, "陕": 4288, "陛": 4289, "陟": 4290, "陡": 4291, "院": 4292, "除": 4293, "陨": 4294, "险": 4295, "陪": 4296, "陲": 4297, "陵": 4298, "陶": 4299, "陷": 4300, "隅": 4301, "隆": 4302, "隈": 4303, "隋": 4304, "隍": 4305, "随": 4306, "隐": 4307, "隔": 4308, "隗": 4309, "隘": 4310, "隙": 4311, "障": 4312, "隧": 4313, "隶": 4314, "隼": 4315, "难": 4316, "雀": 4317, "雁": 4318, "雄": 4319, "雅": 4320, "集": 4321, "雇": 4322, "雉": 4323, "雌": 4324, "雍": 4325, "雏": 4326, "雑": 4327, "雒": 4328, "雕": 4329, "雨": 4330, "雪": 4331, "雯": 4332, "雳": 4333, "零": 4334, "雷": 4335, "雹": 4336, "雾": 4337, "需": 4338, "霄": 4339, "霆": 4340, "震": 4341, "霈": 4342, "霉": 4343, "霍": 4344, "霑": 4345, "霓": 4346, "霖": 4347, "霜": 4348, "霞": 4349, "霰": 4350, "露": 4351, "霸": 4352, "霹": 4353, "青": 4354, "靓": 4355, "靖": 4356, "静": 4357, "靛": 4358, "非": 4359, "靠": 4360, "靡": 4361, "面": 4362, "革": 4363, "靳": 4364, "靴": 4365, "靶": 4366, "靺": 4367, "靼": 4368, "鞅": 4369, "鞋": 4370, "鞍": 4371, "鞑": 4372, "鞘": 4373, "鞣": 4374, "鞨": 4375, "鞭": 4376, "韦": 4377, "韧": 4378, "韩": 4379, "韫": 4380, "韬": 4381, "韭": 4382, "音": 4383, "韵": 4384, "韶": 4385, "頴": 4386, "页": 4387, "顶": 4388, "顷": 4389, "项": 4390, "顺": 4391, "须": 4392, "顽": 4393, "顾": 4394, "顿": 4395, "颁": 4396, "颂": 4397, "预": 4398, "颅": 4399, "领": 4400, "颇": 4401, "颈": 4402, "颉": 4403, "颊": 4404, "颌": 4405, "颍": 4406, "颐": 4407, "频": 4408, "颖": 4409, "颗": 4410, "题": 4411, "颚": 4412, "颜": 4413, "额": 4414, "颞": 4415, "颠": 4416, "颤": 4417, "风": 4418, "飒": 4419, "飓": 4420, "飘": 4421, "飙": 4422, "飞": 4423, "食": 4424, "餐": 4425, "餵": 4426, "饥": 4427, "饪": 4428, "饬": 4429, "饭": 4430, "饮": 4431, "饯": 4432, "饰": 4433, "饱": 4434, "饲": 4435, "饴": 4436, "饵": 4437, "饶": 4438, "饷": 4439, "饼": 4440, "饿": 4441, "馀": 4442, "馅": 4443, "馆": 4444, "馈": 4445, "馔": 4446, "首": 4447, "香": 4448, "馥": 4449, "馨": 4450, "馯": 4451, "马": 4452, "驭": 4453, "驮": 4454, "驯": 4455, "驰": 4456, "驱": 4457, "驳": 4458, "驴": 4459, "驶": 4460, "驷": 4461, "驸": 4462, "驹": 4463, "驻": 4464, "驼": 4465, "驾": 4466, "驿": 4467, "骁": 4468, "骂": 4469, "骄": 4470, "骆": 4471, "骈": 4472, "验": 4473, "骏": 4474, "骑": 4475, "骗": 4476, "骘": 4477, "骚": 4478, "骠": 4479, "骤": 4480, "骥": 4481, "骨": 4482, "骰": 4483, "骷": 4484, "骸": 4485, "骼": 4486, "髅": 4487, "髎": 4488, "髓": 4489, "高": 4490, "髻": 4491, "鬃": 4492, "鬣": 4493, "鬼": 4494, "魁": 4495, "魂": 4496, "魄": 4497, "魅": 4498, "魏": 4499, "魔": 4500, "鮎": 4501, "鱼": 4502, "鱿": 4503, "鲀": 4504, "鲁": 4505, "鲃": 4506, "鲇": 4507, "鲈": 4508, "鲍": 4509, "鲎": 4510, "鲑": 4511, "鲛": 4512, "鲜": 4513, "鲡": 4514, "鲢": 4515, "鲣": 4516, "鲤": 4517, "鲨": 4518, "鲫": 4519, "鲲": 4520, "鲳": 4521, "鲴": 4522, "鲷": 4523, "鲸": 4524, "鲹": 4525, "鲻": 4526, "鲼": 4527, "鲾": 4528, "鳃": 4529, "鳄": 4530, "鳅": 4531, "鳌": 4532, "鳍": 4533, "鳐": 4534, "鳔": 4535, "鳕": 4536, "鳖": 4537, "鳗": 4538, "鳞": 4539, "鳟": 4540, "鳢": 4541, "鸟": 4542, "鸠": 4543, "鸡": 4544, "鸢": 4545, "鸣": 4546, "鸥": 4547, "鸦": 4548, "鸩": 4549, "鸭": 4550, "鸮": 4551, "鸯": 4552, "鸱": 4553, "鸲": 4554, "鸵": 4555, "鸻": 4556, "鸽": 4557, "鸾": 4558, "鸿": 4559, "鹀": 4560, "鹃": 4561, "鹅": 4562, "鹉": 4563, "鹊": 4564, "鹋": 4565, "鹏": 4566, "鹑": 4567, "鹘": 4568, "鹤": 4569, "鹦": 4570, "鹩": 4571, "鹪": 4572, "鹫": 4573, "鹬": 4574, "鹭": 4575, "鹰": 4576, "鹳": 4577, "鹿": 4578, "麂": 4579, "麃": 4580, "麋": 4581, "麒": 4582, "麓": 4583, "麝": 4584, "麟": 4585, "麦": 4586, "麻": 4587, "麾": 4588, "黄": 4589, "黍": 4590, "黎": 4591, "黏": 4592, "黑": 4593, "黔": 4594, "默": 4595, "黛": 4596, "黜": 4597, "黧": 4598, "黯": 4599, "黻": 4600, "黼": 4601, "黾": 4602, "鼎": 4603, "鼐": 4604, "鼓": 4605, "鼠": 4606, "鼢": 4607, "鼩": 4608, "鼬": 4609, "鼱": 4610, "鼷": 4611, "鼹": 4612, "鼻": 4613, "齐": 4614, "齿": 4615, "龄": 4616, "龈": 4617, "龙": 4618, "龚": 4619, "龛": 4620, "龟": 4621, "龢": 4622, "ｐ": 4623, "|": 0, "[UNK]": 4624, "[PAD]": 4625}