Oztobuzz commited on
Commit
e269953
1 Parent(s): 198885d

Add new SentenceTransformer model.

Browse files
1_Pooling/config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false
9
+ }
README.md ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: sentence-transformers
3
+ pipeline_tag: sentence-similarity
4
+ tags:
5
+ - sentence-transformers
6
+ - feature-extraction
7
+ - sentence-similarity
8
+ - transformers
9
+
10
+ ---
11
+
12
+ # Oztobuzz/Simcse_test_banking
13
+
14
+ This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 768 dimensional dense vector space and can be used for tasks like clustering or semantic search.
15
+
16
+ <!--- Describe your model here -->
17
+
18
+ ## Usage (Sentence-Transformers)
19
+
20
+ Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed:
21
+
22
+ ```
23
+ pip install -U sentence-transformers
24
+ ```
25
+
26
+ Then you can use the model like this:
27
+
28
+ ```python
29
+ from sentence_transformers import SentenceTransformer
30
+ sentences = ["This is an example sentence", "Each sentence is converted"]
31
+
32
+ model = SentenceTransformer('Oztobuzz/Simcse_test_banking')
33
+ embeddings = model.encode(sentences)
34
+ print(embeddings)
35
+ ```
36
+
37
+
38
+
39
+ ## Usage (HuggingFace Transformers)
40
+ Without [sentence-transformers](https://www.SBERT.net), you can use the model like this: First, you pass your input through the transformer model, then you have to apply the right pooling-operation on-top of the contextualized word embeddings.
41
+
42
+ ```python
43
+ from transformers import AutoTokenizer, AutoModel
44
+ import torch
45
+
46
+
47
+ #Mean Pooling - Take attention mask into account for correct averaging
48
+ def mean_pooling(model_output, attention_mask):
49
+ token_embeddings = model_output[0] #First element of model_output contains all token embeddings
50
+ input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
51
+ return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
52
+
53
+
54
+ # Sentences we want sentence embeddings for
55
+ sentences = ['This is an example sentence', 'Each sentence is converted']
56
+
57
+ # Load model from HuggingFace Hub
58
+ tokenizer = AutoTokenizer.from_pretrained('Oztobuzz/Simcse_test_banking')
59
+ model = AutoModel.from_pretrained('Oztobuzz/Simcse_test_banking')
60
+
61
+ # Tokenize sentences
62
+ encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
63
+
64
+ # Compute token embeddings
65
+ with torch.no_grad():
66
+ model_output = model(**encoded_input)
67
+
68
+ # Perform pooling. In this case, mean pooling.
69
+ sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
70
+
71
+ print("Sentence embeddings:")
72
+ print(sentence_embeddings)
73
+ ```
74
+
75
+
76
+
77
+ ## Evaluation Results
78
+
79
+ <!--- Describe how your model was evaluated -->
80
+
81
+ For an automated evaluation of this model, see the *Sentence Embeddings Benchmark*: [https://seb.sbert.net](https://seb.sbert.net?model_name=Oztobuzz/Simcse_test_banking)
82
+
83
+
84
+ ## Training
85
+ The model was trained with the parameters:
86
+
87
+ **DataLoader**:
88
+
89
+ `torch.utils.data.dataloader.DataLoader` of length 102 with parameters:
90
+ ```
91
+ {'batch_size': 128, 'sampler': 'torch.utils.data.sampler.RandomSampler', 'batch_sampler': 'torch.utils.data.sampler.BatchSampler'}
92
+ ```
93
+
94
+ **Loss**:
95
+
96
+ `sentence_transformers.losses.MultipleNegativesRankingLoss.MultipleNegativesRankingLoss` with parameters:
97
+ ```
98
+ {'scale': 20.0, 'similarity_fct': 'cos_sim'}
99
+ ```
100
+
101
+ Parameters of the fit()-Method:
102
+ ```
103
+ {
104
+ "epochs": 1,
105
+ "evaluation_steps": 0,
106
+ "evaluator": "NoneType",
107
+ "max_grad_norm": 1,
108
+ "optimizer_class": "<class 'torch.optim.adamw.AdamW'>",
109
+ "optimizer_params": {
110
+ "lr": 5e-05
111
+ },
112
+ "scheduler": "WarmupLinear",
113
+ "steps_per_epoch": null,
114
+ "warmup_steps": 11,
115
+ "weight_decay": 0.01
116
+ }
117
+ ```
118
+
119
+
120
+ ## Full Model Architecture
121
+ ```
122
+ SentenceTransformer(
123
+ (0): Transformer({'max_seq_length': 32, 'do_lower_case': False}) with Transformer model: RobertaModel
124
+ (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False})
125
+ )
126
+ ```
127
+
128
+ ## Citing & Authors
129
+
130
+ <!--- Describe where people can find more information -->
config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Oztobuzz/my_testing_mlm_model",
3
+ "architectures": [
4
+ "RobertaModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 258,
17
+ "model_type": "roberta",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "tokenizer_class": "PhobertTokenizer",
23
+ "torch_dtype": "float32",
24
+ "transformers_version": "4.35.2",
25
+ "type_vocab_size": 1,
26
+ "use_cache": true,
27
+ "vocab_size": 64001
28
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "2.2.2",
4
+ "transformers": "4.35.2",
5
+ "pytorch": "2.1.0+cu121"
6
+ }
7
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07e6765740839d2bf6330d7758317881e72517eb8e6179c6c0c2694409fc9b1a
3
+ size 540015464
modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 32,
3
+ "do_lower_case": false
4
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "</s>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
@@ -0,0 +1,2975 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 32,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
+ "padding": {
10
+ "strategy": "BatchLongest",
11
+ "direction": "Right",
12
+ "pad_to_multiple_of": null,
13
+ "pad_id": 1,
14
+ "pad_type_id": 0,
15
+ "pad_token": "</s>"
16
+ },
17
+ "added_tokens": [
18
+ {
19
+ "id": 0,
20
+ "content": "<s>",
21
+ "single_word": false,
22
+ "lstrip": false,
23
+ "rstrip": false,
24
+ "normalized": false,
25
+ "special": true
26
+ },
27
+ {
28
+ "id": 1,
29
+ "content": "</s>",
30
+ "single_word": false,
31
+ "lstrip": false,
32
+ "rstrip": false,
33
+ "normalized": false,
34
+ "special": true
35
+ },
36
+ {
37
+ "id": 2,
38
+ "content": "<unk>",
39
+ "single_word": false,
40
+ "lstrip": false,
41
+ "rstrip": false,
42
+ "normalized": false,
43
+ "special": true
44
+ },
45
+ {
46
+ "id": 3,
47
+ "content": "<pad>",
48
+ "single_word": false,
49
+ "lstrip": false,
50
+ "rstrip": false,
51
+ "normalized": false,
52
+ "special": true
53
+ },
54
+ {
55
+ "id": 4,
56
+ "content": "<mask>",
57
+ "single_word": false,
58
+ "lstrip": false,
59
+ "rstrip": false,
60
+ "normalized": false,
61
+ "special": true
62
+ }
63
+ ],
64
+ "normalizer": {
65
+ "type": "Sequence",
66
+ "normalizers": [
67
+ {
68
+ "type": "Lowercase"
69
+ }
70
+ ]
71
+ },
72
+ "pre_tokenizer": {
73
+ "type": "Whitespace"
74
+ },
75
+ "post_processor": {
76
+ "type": "TemplateProcessing",
77
+ "single": [
78
+ {
79
+ "SpecialToken": {
80
+ "id": "<s>",
81
+ "type_id": 0
82
+ }
83
+ },
84
+ {
85
+ "Sequence": {
86
+ "id": "A",
87
+ "type_id": 0
88
+ }
89
+ },
90
+ {
91
+ "SpecialToken": {
92
+ "id": "</s>",
93
+ "type_id": 0
94
+ }
95
+ }
96
+ ],
97
+ "pair": [
98
+ {
99
+ "SpecialToken": {
100
+ "id": "<s>",
101
+ "type_id": 0
102
+ }
103
+ },
104
+ {
105
+ "Sequence": {
106
+ "id": "A",
107
+ "type_id": 0
108
+ }
109
+ },
110
+ {
111
+ "SpecialToken": {
112
+ "id": "</s>",
113
+ "type_id": 0
114
+ }
115
+ },
116
+ {
117
+ "SpecialToken": {
118
+ "id": "</s>",
119
+ "type_id": 0
120
+ }
121
+ },
122
+ {
123
+ "Sequence": {
124
+ "id": "B",
125
+ "type_id": 0
126
+ }
127
+ },
128
+ {
129
+ "SpecialToken": {
130
+ "id": "</s>",
131
+ "type_id": 0
132
+ }
133
+ }
134
+ ],
135
+ "special_tokens": {
136
+ "</s>": {
137
+ "id": "</s>",
138
+ "ids": [
139
+ 1
140
+ ],
141
+ "tokens": [
142
+ "</s>"
143
+ ]
144
+ },
145
+ "<mask>": {
146
+ "id": "<mask>",
147
+ "ids": [
148
+ 4
149
+ ],
150
+ "tokens": [
151
+ "<mask>"
152
+ ]
153
+ },
154
+ "<pad>": {
155
+ "id": "<pad>",
156
+ "ids": [
157
+ 3
158
+ ],
159
+ "tokens": [
160
+ "<pad>"
161
+ ]
162
+ },
163
+ "<s>": {
164
+ "id": "<s>",
165
+ "ids": [
166
+ 0
167
+ ],
168
+ "tokens": [
169
+ "<s>"
170
+ ]
171
+ },
172
+ "<unk>": {
173
+ "id": "<unk>",
174
+ "ids": [
175
+ 2
176
+ ],
177
+ "tokens": [
178
+ "<unk>"
179
+ ]
180
+ }
181
+ }
182
+ },
183
+ "decoder": {
184
+ "type": "WordPiece",
185
+ "prefix": "##",
186
+ "cleanup": true
187
+ },
188
+ "model": {
189
+ "type": "WordPiece",
190
+ "unk_token": "<unk>",
191
+ "continuing_subword_prefix": "##",
192
+ "max_input_chars_per_word": 100,
193
+ "vocab": {
194
+ "<s>": 0,
195
+ "</s>": 1,
196
+ "<unk>": 2,
197
+ "<pad>": 3,
198
+ "<mask>": 4,
199
+ ",": 5,
200
+ "-": 6,
201
+ ".": 7,
202
+ "/": 8,
203
+ "0": 9,
204
+ "1": 10,
205
+ "2": 11,
206
+ "3": 12,
207
+ "4": 13,
208
+ "5": 14,
209
+ "6": 15,
210
+ "7": 16,
211
+ "8": 17,
212
+ "9": 18,
213
+ "?": 19,
214
+ "_": 20,
215
+ "a": 21,
216
+ "b": 22,
217
+ "c": 23,
218
+ "d": 24,
219
+ "e": 25,
220
+ "f": 26,
221
+ "g": 27,
222
+ "h": 28,
223
+ "i": 29,
224
+ "j": 30,
225
+ "k": 31,
226
+ "l": 32,
227
+ "m": 33,
228
+ "n": 34,
229
+ "o": 35,
230
+ "p": 36,
231
+ "q": 37,
232
+ "r": 38,
233
+ "s": 39,
234
+ "t": 40,
235
+ "u": 41,
236
+ "v": 42,
237
+ "w": 43,
238
+ "x": 44,
239
+ "y": 45,
240
+ "z": 46,
241
+ "à": 47,
242
+ "á": 48,
243
+ "â": 49,
244
+ "ã": 50,
245
+ "è": 51,
246
+ "é": 52,
247
+ "ê": 53,
248
+ "ì": 54,
249
+ "í": 55,
250
+ "ò": 56,
251
+ "ó": 57,
252
+ "ô": 58,
253
+ "ù": 59,
254
+ "ú": 60,
255
+ "ü": 61,
256
+ "ý": 62,
257
+ "ă": 63,
258
+ "đ": 64,
259
+ "ĩ": 65,
260
+ "ũ": 66,
261
+ "ơ": 67,
262
+ "ư": 68,
263
+ "ạ": 69,
264
+ "ả": 70,
265
+ "ấ": 71,
266
+ "ầ": 72,
267
+ "ẩ": 73,
268
+ "ẫ": 74,
269
+ "ậ": 75,
270
+ "ắ": 76,
271
+ "ằ": 77,
272
+ "ẳ": 78,
273
+ "ẵ": 79,
274
+ "ặ": 80,
275
+ "ẹ": 81,
276
+ "ẻ": 82,
277
+ "ẽ": 83,
278
+ "ế": 84,
279
+ "ề": 85,
280
+ "ể": 86,
281
+ "ệ": 87,
282
+ "ỉ": 88,
283
+ "ị": 89,
284
+ "ọ": 90,
285
+ "ỏ": 91,
286
+ "ố": 92,
287
+ "ồ": 93,
288
+ "ổ": 94,
289
+ "ỗ": 95,
290
+ "ộ": 96,
291
+ "ớ": 97,
292
+ "ờ": 98,
293
+ "ở": 99,
294
+ "ỡ": 100,
295
+ "ợ": 101,
296
+ "ụ": 102,
297
+ "ủ": 103,
298
+ "ứ": 104,
299
+ "ừ": 105,
300
+ "ử": 106,
301
+ "ữ": 107,
302
+ "ự": 108,
303
+ "ỳ": 109,
304
+ "##a": 110,
305
+ "##o": 111,
306
+ "##_": 112,
307
+ "##g": 113,
308
+ "##ồ": 114,
309
+ "##m": 115,
310
+ "##ạ": 116,
311
+ "##n": 117,
312
+ "##s": 118,
313
+ "##ơ": 119,
314
+ "##e": 120,
315
+ "##b": 121,
316
+ "##u": 122,
317
+ "##á": 123,
318
+ "##t": 124,
319
+ "##h": 125,
320
+ "##ả": 126,
321
+ "##i": 127,
322
+ "##r": 128,
323
+ "##c": 129,
324
+ "##ó": 130,
325
+ "##2": 131,
326
+ "##5": 132,
327
+ "##y": 133,
328
+ "##ớ": 134,
329
+ "##â": 135,
330
+ "##đ": 136,
331
+ "##ã": 137,
332
+ "##3": 138,
333
+ "##ị": 139,
334
+ "##0": 140,
335
+ "##k": 141,
336
+ "##9": 142,
337
+ "##l": 143,
338
+ "##ứ": 144,
339
+ "##ộ": 145,
340
+ "##ũ": 146,
341
+ "##ô": 147,
342
+ "##ự": 148,
343
+ "##ế": 149,
344
+ "##p": 150,
345
+ "##ố": 151,
346
+ "##ể": 152,
347
+ "##ê": 153,
348
+ "##v": 154,
349
+ "##ấ": 155,
350
+ "##f": 156,
351
+ "##ừ": 157,
352
+ "##7": 158,
353
+ "##d": 159,
354
+ "##6": 160,
355
+ "##q": 161,
356
+ "##ù": 162,
357
+ "##1": 163,
358
+ "##4": 164,
359
+ "##ỳ": 165,
360
+ "##à": 166,
361
+ "##ư": 167,
362
+ "##ệ": 168,
363
+ "##w": 169,
364
+ "##j": 170,
365
+ "##ỏ": 171,
366
+ "##ì": 172,
367
+ "##ẫ": 173,
368
+ "##ẵ": 174,
369
+ "##ỗ": 175,
370
+ "##ổ": 176,
371
+ "##z": 177,
372
+ "##x": 178,
373
+ "##ẻ": 179,
374
+ "##ầ": 180,
375
+ "##í": 181,
376
+ "##8": 182,
377
+ "##ọ": 183,
378
+ "##ắ": 184,
379
+ "##ú": 185,
380
+ "##ủ": 186,
381
+ "##ậ": 187,
382
+ "##è": 188,
383
+ "##ề": 189,
384
+ "##ờ": 190,
385
+ "##ằ": 191,
386
+ "##ử": 192,
387
+ "##ò": 193,
388
+ "##ẹ": 194,
389
+ "##ặ": 195,
390
+ "##ữ": 196,
391
+ "##ẽ": 197,
392
+ "##ỡ": 198,
393
+ "##ĩ": 199,
394
+ "##ă": 200,
395
+ "##ợ": 201,
396
+ "##ẩ": 202,
397
+ "##ẳ": 203,
398
+ "##ụ": 204,
399
+ "##é": 205,
400
+ "##ở": 206,
401
+ "##ỉ": 207,
402
+ "##ý": 208,
403
+ "##ü": 209,
404
+ "##ến": 210,
405
+ "##ng": 211,
406
+ "ch": 212,
407
+ "ba": 213,
408
+ "##uy": 214,
409
+ "đến": 215,
410
+ "bay": 216,
411
+ "##uyến": 217,
412
+ "chuyến": 218,
413
+ "##nh": 219,
414
+ "từ": 220,
415
+ "th": 221,
416
+ "##ào": 222,
417
+ "##à_": 223,
418
+ "##ôi": 224,
419
+ "tôi": 225,
420
+ "##ác": 226,
421
+ "các": 227,
422
+ "gi": 228,
423
+ "##à_n": 229,
424
+ "##_t": 230,
425
+ "##ph": 231,
426
+ "##ành": 232,
427
+ "##_c": 233,
428
+ "##_ph": 234,
429
+ "vào": 235,
430
+ "cho": 236,
431
+ "kh": 237,
432
+ "##ông": 238,
433
+ "##ất": 239,
434
+ "##_h": 240,
435
+ "có": 241,
436
+ "nh": 242,
437
+ "##ội": 243,
438
+ "hà_n": 244,
439
+ "hà_nội": 245,
440
+ "##_th": 246,
441
+ "thành": 247,
442
+ "##_phố": 248,
443
+ "thành_phố": 249,
444
+ "##ột": 250,
445
+ "##iế": 251,
446
+ "##inh": 252,
447
+ "##_m": 253,
448
+ "##uố": 254,
449
+ "##iết": 255,
450
+ "biết": 256,
451
+ "nào": 257,
452
+ "##_k": 258,
453
+ "thứ": 259,
454
+ "hạ": 260,
455
+ "##_b": 261,
456
+ "##iệ": 262,
457
+ "ng": 263,
458
+ "##áng": 264,
459
+ "##_ch": 265,
460
+ "một": 266,
461
+ "và": 267,
462
+ "ph": 268,
463
+ "tr": 269,
464
+ "##_l": 270,
465
+ "hu": 271,
466
+ "huế": 272,
467
+ "##ày": 273,
468
+ "ngày": 274,
469
+ "##ong": 275,
470
+ "bu": 276,
471
+ "củ": 277,
472
+ "của": 278,
473
+ "##et": 279,
474
+ "nhất": 280,
475
+ "##_minh": 281,
476
+ "hồ": 282,
477
+ "##í_minh": 283,
478
+ "##_chí_minh": 284,
479
+ "hồ_chí_minh": 285,
480
+ "muố": 286,
481
+ "muốn": 287,
482
+ "##ẵng": 288,
483
+ "đà_n": 289,
484
+ "đà_nẵng": 290,
485
+ "##ần": 291,
486
+ "##ất_c": 292,
487
+ "##anh": 293,
488
+ "vé": 294,
489
+ "giờ": 295,
490
+ "tất_c": 296,
491
+ "tất_cả": 297,
492
+ "##in": 298,
493
+ "hạ_l": 299,
494
+ "hạ_long": 300,
495
+ "không": 301,
496
+ "##iề": 302,
497
+ "cần": 303,
498
+ "giá": 304,
499
+ "##iều": 305,
500
+ "##iê": 306,
501
+ "##ữa": 307,
502
+ "##ay": 308,
503
+ "##am": 309,
504
+ "##àng": 310,
505
+ "hàng": 311,
506
+ "##ạt": 312,
507
+ "##ôn": 313,
508
+ "##au": 314,
509
+ "đà_": 315,
510
+ "##lạt": 316,
511
+ "đà_lạt": 317,
512
+ "##ởi": 318,
513
+ "##ai": 319,
514
+ "##iệt": 320,
515
+ "liệt": 321,
516
+ "##_kê": 322,
517
+ "liệt_kê": 323,
518
+ "khởi": 324,
519
+ "##_hành": 325,
520
+ "khởi_hành": 326,
521
+ "sáng": 327,
522
+ "hã": 328,
523
+ "##òng": 329,
524
+ "##_kh": 330,
525
+ "##_không": 331,
526
+ "đi": 332,
527
+ "hãng": 333,
528
+ "hàng_không": 334,
529
+ "tháng": 335,
530
+ "##_ho": 336,
531
+ "đư": 337,
532
+ "##_nh": 338,
533
+ "là": 339,
534
+ "giữa": 340,
535
+ "##ar": 341,
536
+ "##_bay": 342,
537
+ "chiều": 343,
538
+ "##uốc": 344,
539
+ "phú": 345,
540
+ "##ới": 346,
541
+ "##ững": 347,
542
+ "những": 348,
543
+ "##st": 349,
544
+ "##ổi": 350,
545
+ "buổi": 351,
546
+ "vinh": 352,
547
+ "##iện": 353,
548
+ "phú_": 354,
549
+ "##ải": 355,
550
+ "##quốc": 356,
551
+ "phú_quốc": 357,
552
+ "trư": 358,
553
+ "##_thu": 359,
554
+ "##a_thu": 360,
555
+ "##ời": 361,
556
+ "##_ma_thu": 362,
557
+ "buôn": 363,
558
+ "##_ma_thuột": 364,
559
+ "buôn_ma_thuột": 365,
560
+ "##_g": 366,
561
+ "##ir": 367,
562
+ "##_thơ": 368,
563
+ "cần_thơ": 369,
564
+ "hải": 370,
565
+ "bao": 371,
566
+ "air": 372,
567
+ "##an": 373,
568
+ "##ối": 374,
569
+ "##on": 375,
570
+ "##_phòng": 376,
571
+ "hải_phòng": 377,
572
+ "##ân": 378,
573
+ "jet": 379,
574
+ "##star": 380,
575
+ "jetstar": 381,
576
+ "tư": 382,
577
+ "vi": 383,
578
+ "##ừng": 384,
579
+ "##iêu": 385,
580
+ "##_nhiêu": 386,
581
+ "bao_nhiêu": 387,
582
+ "viet": 388,
583
+ "dừng": 389,
584
+ "##es": 390,
585
+ "hai": 391,
586
+ "##ồi": 392,
587
+ "##ng_t": 393,
588
+ "##_thông": 394,
589
+ "hạng": 395,
590
+ "##ờng": 396,
591
+ "rẻ": 397,
592
+ "sau": 398,
593
+ "đường": 399,
594
+ "##ươ": 400,
595
+ "thanh": 401,
596
+ "##ao": 402,
597
+ "##_hoá": 403,
598
+ "thanh_hoá": 404,
599
+ "##ch": 405,
600
+ "tối": 406,
601
+ "##_s": 407,
602
+ "##lin": 408,
603
+ "sân": 409,
604
+ "sân_bay": 410,
605
+ "##ứ_h": 411,
606
+ "khứ_h": 412,
607
+ "khứ_hồi": 413,
608
+ "cam": 414,
609
+ "##_r": 415,
610
+ "cam_r": 416,
611
+ "cam_ranh": 417,
612
+ "##iên": 418,
613
+ "##ại": 419,
614
+ "##_bộ": 420,
615
+ "đường_bộ": 421,
616
+ "có_th": 422,
617
+ "có_thể": 423,
618
+ "cà_": 424,
619
+ "##mau": 425,
620
+ "giao": 426,
621
+ "cà_mau": 427,
622
+ "giao_thông": 428,
623
+ "airlin": 429,
624
+ "airlines": 430,
625
+ "xe": 431,
626
+ "##_đ": 432,
627
+ "sớ": 433,
628
+ "sớm": 434,
629
+ "##ớc": 435,
630
+ "trước": 436,
631
+ "rời": 437,
632
+ "##nam": 438,
633
+ "##òn": 439,
634
+ "##ing": 440,
635
+ "##as": 441,
636
+ "##ơn": 442,
637
+ "##_gi": 443,
638
+ "vietnam": 444,
639
+ "##ăm": 445,
640
+ "##ảng": 446,
641
+ "##_v": 447,
642
+ "##_hoà": 448,
643
+ "năm": 449,
644
+ "phươ": 450,
645
+ "##ng_tiện": 451,
646
+ "phương_tiện": 452,
647
+ "bạ": 453,
648
+ "bạn": 454,
649
+ "về": 455,
650
+ "##ồng": 456,
651
+ "xem": 457,
652
+ "##or": 458,
653
+ "##_vụ": 459,
654
+ "dc": 460,
655
+ "côn": 461,
656
+ "tuy": 462,
657
+ "tuy_hoà": 463,
658
+ "đồng": 464,
659
+ "thông": 465,
660
+ "##_tin": 466,
661
+ "thông_tin": 467,
662
+ "##ợc": 468,
663
+ "được": 469,
664
+ "##ash": 470,
665
+ "quy": 471,
666
+ "tì": 472,
667
+ "nhơn": 473,
668
+ "tìm": 474,
669
+ "##ảo": 475,
670
+ "##ton": 476,
671
+ "wash": 477,
672
+ "washing": 478,
673
+ "##_đảo": 479,
674
+ "washington": 480,
675
+ "##ảy": 481,
676
+ "côn_đảo": 482,
677
+ "điện": 483,
678
+ "##ui": 484,
679
+ "##_phủ": 485,
680
+ "##_biên": 486,
681
+ "điện_biên": 487,
682
+ "điện_biên_phủ": 488,
683
+ "vui": 489,
684
+ "##_lòng": 490,
685
+ "vui_lòng": 491,
686
+ "##ên": 492,
687
+ "##ùng": 493,
688
+ "##al": 494,
689
+ "lo": 495,
690
+ "gì": 496,
691
+ "má": 497,
692
+ "##y_bay": 498,
693
+ "##ài": 499,
694
+ "máy_bay": 500,
695
+ "##ánh": 501,
696
+ "##ục": 502,
697
+ "##_chiều": 503,
698
+ "một_chiều": 504,
699
+ "##_bảy": 505,
700
+ "kho": 506,
701
+ "khoảng": 507,
702
+ "##le": 508,
703
+ "loại": 509,
704
+ "##áu": 510,
705
+ "##ách": 511,
706
+ "thứ_bảy": 512,
707
+ "##_hới": 513,
708
+ "ngh": 514,
709
+ "đồng_hới": 515,
710
+ "phục": 516,
711
+ "phục_vụ": 517,
712
+ "##ủ_nh": 518,
713
+ "##ật": 519,
714
+ "chủ_nh": 520,
715
+ "trưa": 521,
716
+ "chủ_nhật": 522,
717
+ "qu": 523,
718
+ "##ển": 524,
719
+ "##ik": 525,
720
+ "trên": 526,
721
+ "##ur": 527,
722
+ "ple": 528,
723
+ "##iku": 529,
724
+ "pleiku": 530,
725
+ "##er": 531,
726
+ "danh": 532,
727
+ "sài": 533,
728
+ "##_gòn": 534,
729
+ "##_sách": 535,
730
+ "danh_sách": 536,
731
+ "sài_gòn": 537,
732
+ "với": 538,
733
+ "##ểm": 539,
734
+ "mu": 540,
735
+ "điểm": 541,
736
+ "bữa": 542,
737
+ "##úp": 543,
738
+ "giúp": 544,
739
+ "trong": 545,
740
+ "##âm": 546,
741
+ "##_cánh": 547,
742
+ "hạ_cánh": 548,
743
+ "##ạch": 549,
744
+ "10": 550,
745
+ "thứ_s": 551,
746
+ "thứ_sáu": 552,
747
+ "12": 553,
748
+ "##_giá": 554,
749
+ "rạch": 555,
750
+ "tới": 556,
751
+ "##ĩa": 557,
752
+ "rạch_giá": 558,
753
+ "hi": 559,
754
+ "##_thị": 560,
755
+ "##ển_thị": 561,
756
+ "hiển_thị": 562,
757
+ "lú": 563,
758
+ "mã": 564,
759
+ "mùng": 565,
760
+ "nghĩa": 566,
761
+ "lúc": 567,
762
+ "còn": 568,
763
+ "##ộn": 569,
764
+ "muộn": 570,
765
+ "##jet": 571,
766
+ "vietjet": 572,
767
+ "ho": 573,
768
+ "xin": 574,
769
+ "##ịch": 575,
770
+ "##iệu": 576,
771
+ "mà": 577,
772
+ "##en": 578,
773
+ "##_n": 579,
774
+ "tru": 580,
775
+ "##bo": 581,
776
+ "##ng_tâm": 582,
777
+ "trung_tâm": 583,
778
+ "lai": 584,
779
+ "chu": 585,
780
+ "##ang": 586,
781
+ "##_d": 587,
782
+ "##ok": 588,
783
+ "##ấp": 589,
784
+ "##is": 590,
785
+ "##ụng": 591,
786
+ "##_dụng": 592,
787
+ "20": 593,
788
+ "sử": 594,
789
+ "##ặc": 595,
790
+ "hoặc": 596,
791
+ "sử_dụng": 597,
792
+ "##ười": 598,
793
+ "##el": 599,
794
+ "##mbo": 600,
795
+ "##il": 601,
796
+ "để": 602,
797
+ "##ia": 603,
798
+ "ăn": 604,
799
+ "chỗ": 605,
800
+ "bambo": 606,
801
+ "bamboo": 607,
802
+ "vn": 608,
803
+ "##ặt": 609,
804
+ "mười": 610,
805
+ "##ic": 611,
806
+ "nối": 612,
807
+ "##ou": 613,
808
+ "số": 614,
809
+ "##way": 615,
810
+ "##ình": 616,
811
+ "sẽ": 617,
812
+ "đặt": 618,
813
+ "##at": 619,
814
+ "airway": 620,
815
+ "airways": 621,
816
+ "quảng": 622,
817
+ "tại": 623,
818
+ "##âu": 624,
819
+ "##êm": 625,
820
+ "11": 626,
821
+ "triệu": 627,
822
+ "##if": 628,
823
+ "##_tr": 629,
824
+ "##ad": 630,
825
+ "##_là": 631,
826
+ "lu": 632,
827
+ "##ầu": 633,
828
+ "##ng_c": 634,
829
+ "##ap": 635,
830
+ "dịch": 636,
831
+ "quốc": 637,
832
+ "##co": 638,
833
+ "##_tế": 639,
834
+ "##_mai": 640,
835
+ "trở": 641,
836
+ "ngày_mai": 642,
837
+ "nghĩa_là": 643,
838
+ "vna": 644,
839
+ "dịch_vụ": 645,
840
+ "quốc_tế": 646,
841
+ "dư": 647,
842
+ "##ơi": 648,
843
+ "dưới": 649,
844
+ "cu": 650,
845
+ "đầu": 651,
846
+ "##yo": 652,
847
+ "##_trình": 653,
848
+ "##ng_cấp": 654,
849
+ "cung_cấp": 655,
850
+ "lịch": 656,
851
+ "##ạm": 657,
852
+ "##i_c": 658,
853
+ "##ắt": 659,
854
+ "đêm": 660,
855
+ "đang": 661,
856
+ "##to": 662,
857
+ "##pur": 663,
858
+ "chi": 664,
859
+ "##_mươ": 665,
860
+ "##ala": 666,
861
+ "lịch_trình": 667,
862
+ "##_mươi": 668,
863
+ "tok": 669,
864
+ "##di": 670,
865
+ "##ổ_thông": 671,
866
+ "##_kỳ": 672,
867
+ "phổ_thông": 673,
868
+ "tokyo": 674,
869
+ "bo": 675,
870
+ "ku": 676,
871
+ "##mpur": 677,
872
+ "##ây": 678,
873
+ "lumpur": 679,
874
+ "kuala": 680,
875
+ "cuố": 681,
876
+ "ne": 682,
877
+ "san": 683,
878
+ "##lan": 684,
879
+ "hai_mươi": 685,
880
+ "##ắc": 686,
881
+ "thời": 687,
882
+ "##_hiệu": 688,
883
+ "##_gian": 689,
884
+ "thời_gian": 690,
885
+ "kinh": 691,
886
+ "##ne": 692,
887
+ "##ực": 693,
888
+ "số_hiệu": 694,
889
+ "mal": 695,
890
+ "##ol": 696,
891
+ "##kok": 697,
892
+ "##ves": 698,
893
+ "##ngkok": 699,
894
+ "bangkok": 700,
895
+ "thấp": 701,
896
+ "##_tiên": 702,
897
+ "đầu_tiên": 703,
898
+ "##dives": 704,
899
+ "maldives": 705,
900
+ "am": 706,
901
+ "di": 707,
902
+ "##th": 708,
903
+ "qua": 709,
904
+ "sáu": 710,
905
+ "tu": 711,
906
+ "đến_n": 712,
907
+ "##i_cùng": 713,
908
+ "cuối_cùng": 714,
909
+ "new": 715,
910
+ "tuần": 716,
911
+ "đến_nơi": 717,
912
+ "bất": 718,
913
+ "man": 719,
914
+ "rồi": 720,
915
+ "##om": 721,
916
+ "chiế": 722,
917
+ "phút": 723,
918
+ "##_giờ": 724,
919
+ "nghỉ": 725,
920
+ "bất_kỳ": 726,
921
+ "chiếc": 727,
922
+ "##ak": 728,
923
+ "##ver": 729,
924
+ "##_phí": 730,
925
+ "trạm": 731,
926
+ "##ster": 732,
927
+ "##urg": 733,
928
+ "chi_phí": 734,
929
+ "bây": 735,
930
+ "par": 736,
931
+ "##ran": 737,
932
+ "##ỏi": 738,
933
+ "chỉ": 739,
934
+ "bây_giờ": 740,
935
+ "du": 741,
936
+ "sẵ": 742,
937
+ "tor": 743,
938
+ "##go": 744,
939
+ "##sin": 745,
940
+ "##im": 746,
941
+ "##lo": 747,
942
+ "##uyển": 748,
943
+ "khỏi": 749,
944
+ "paris": 750,
945
+ "sẵn": 751,
946
+ "fran": 752,
947
+ "se": 753,
948
+ "##burg": 754,
949
+ "##ức": 755,
950
+ "##ương": 756,
951
+ "##ore": 757,
952
+ "27": 758,
953
+ "bắc": 759,
954
+ "##n_ch": 760,
955
+ "##bu": 761,
956
+ "chứ": 762,
957
+ "thu": 763,
958
+ "khác": 764,
959
+ "##ila": 765,
960
+ "manila": 766,
961
+ "##ac": 767,
962
+ "##ha": 768,
963
+ "##dam": 769,
964
+ "202": 770,
965
+ "chứa": 771,
966
+ "thuê": 772,
967
+ "ap": 773,
968
+ "cal": 774,
969
+ "hô": 775,
970
+ "lon": 776,
971
+ "đó": 777,
972
+ "##os": 778,
973
+ "##nia": 779,
974
+ "##ta": 780,
975
+ "##ống": 781,
976
+ "##don": 782,
977
+ "##ornia": 783,
978
+ "##ifornia": 784,
979
+ "california": 785,
980
+ "hôm": 786,
981
+ "london": 787,
982
+ "mai": 788,
983
+ "pac": 789,
984
+ "tân": 790,
985
+ "##ich": 791,
986
+ "##ượ": 792,
987
+ "##ăn": 793,
988
+ "hạn_ch": 794,
989
+ "trống": 795,
990
+ "đường_bay": 796,
991
+ "##_ninh": 797,
992
+ "##oul": 798,
993
+ "##ific": 799,
994
+ "pacific": 800,
995
+ "hạn_chế": 801,
996
+ "dl": 802,
997
+ "lại": 803,
998
+ "sing": 804,
999
+ "tắt": 805,
1000
+ "##_x": 806,
1001
+ "##tt": 807,
1002
+ "##iếp": 808,
1003
+ "##_chuyển": 809,
1004
+ "##onto": 810,
1005
+ "quảng_ninh": 811,
1006
+ "##apore": 812,
1007
+ "dub": 813,
1008
+ "toronto": 814,
1009
+ "singapore": 815,
1010
+ "15": 816,
1011
+ "bài": 817,
1012
+ "hong": 818,
1013
+ "ok": 819,
1014
+ "rom": 820,
1015
+ "sức": 821,
1016
+ "##kong": 822,
1017
+ "cách": 823,
1018
+ "amster": 824,
1019
+ "di_chuyển": 825,
1020
+ "franc": 826,
1021
+ "seoul": 827,
1022
+ "hongkong": 828,
1023
+ "amsterdam": 829,
1024
+ "28": 830,
1025
+ "den": 831,
1026
+ "liên": 832,
1027
+ "mos": 833,
1028
+ "viết": 834,
1029
+ "##san": 835,
1030
+ "##ám": 836,
1031
+ "##iêng": 837,
1032
+ "hãy": 838,
1033
+ "##ston": 839,
1034
+ "##cow": 840,
1035
+ "##ượng": 841,
1036
+ "denver": 842,
1037
+ "moscow": 843,
1038
+ "30": 844,
1039
+ "lim": 845,
1040
+ "vj": 846,
1041
+ "viêng": 847,
1042
+ "##las": 848,
1043
+ "##ấy": 849,
1044
+ "chăn": 850,
1045
+ "##isco": 851,
1046
+ "francisco": 852,
1047
+ "17": 853,
1048
+ "do": 854,
1049
+ "dal": 855,
1050
+ "##ưỡ": 856,
1051
+ "dallas": 857,
1052
+ "cất_c": 858,
1053
+ "mất": 859,
1054
+ "mấy": 860,
1055
+ "##id": 861,
1056
+ "##ng_": 862,
1057
+ "chân": 863,
1058
+ "thương": 864,
1059
+ "##_tiếp": 865,
1060
+ "busan": 866,
1061
+ "tươ": 867,
1062
+ "##_gia": 868,
1063
+ "##ork": 869,
1064
+ "##ousin": 870,
1065
+ "##land": 871,
1066
+ "limousin": 872,
1067
+ "doha": 873,
1068
+ "cất_cánh": 874,
1069
+ "thương_gia": 875,
1070
+ "limousine": 876,
1071
+ "21": 877,
1072
+ "bảy": 878,
1073
+ "la": 879,
1074
+ "tám": 880,
1075
+ "vân": 881,
1076
+ "##ago": 882,
1077
+ "##ồn": 883,
1078
+ "##us": 884,
1079
+ "##ck": 885,
1080
+ "##ứng": 886,
1081
+ "thì": 887,
1082
+ "khánh": 888,
1083
+ "##_lại": 889,
1084
+ "##_đồn": 890,
1085
+ "##urne": 891,
1086
+ "##_nay": 892,
1087
+ "boston": 893,
1088
+ "hôm_nay": 894,
1089
+ "##ng_ứng": 895,
1090
+ "tương_ứng": 896,
1091
+ "vân_đồn": 897,
1092
+ "khánh_hoà": 898,
1093
+ "at": 899,
1094
+ "cá": 900,
1095
+ "cả": 901,
1096
+ "cùng": 902,
1097
+ "mel": 903,
1098
+ "này": 904,
1099
+ "rưỡ": 905,
1100
+ "york": 906,
1101
+ "##eing": 907,
1102
+ "##uk": 908,
1103
+ "##ằng": 909,
1104
+ "thượng": 910,
1105
+ "người": 911,
1106
+ "##orth": 912,
1107
+ "##bourne": 913,
1108
+ "boeing": 914,
1109
+ "2020": 915,
1110
+ "melbourne": 916,
1111
+ "rưỡi": 917,
1112
+ "hơn": 918,
1113
+ "nội": 919,
1114
+ "##sburg": 920,
1115
+ "##ích": 921,
1116
+ "châu": 922,
1117
+ "phù": 923,
1118
+ "trực": 924,
1119
+ "##_sơn": 925,
1120
+ "##att": 926,
1121
+ "##atar": 927,
1122
+ "trực_tiếp": 928,
1123
+ "a3": 929,
1124
+ "bằng": 930,
1125
+ "tiề": 931,
1126
+ "thế": 932,
1127
+ "##_tục": 933,
1128
+ "##phia": 934,
1129
+ "##_châu": 935,
1130
+ "phải": 936,
1131
+ "phil": 937,
1132
+ "làm": 938,
1133
+ "##elphia": 939,
1134
+ "##adelphia": 940,
1135
+ "tiền": 941,
1136
+ "philadelphia": 942,
1137
+ "aar": 943,
1138
+ "bởi": 944,
1139
+ "cũ": 945,
1140
+ "mil": 946,
1141
+ "os": 947,
1142
+ "sao": 948,
1143
+ "zur": 949,
1144
+ "##_nam": 950,
1145
+ "##enh": 951,
1146
+ "##hus": 952,
1147
+ "##it": 953,
1148
+ "##ắn": 954,
1149
+ "thành_đ": 955,
1150
+ "phuk": 956,
1151
+ "##_giang": 957,
1152
+ "quay": 958,
1153
+ "tân_sơn": 959,
1154
+ "rome": 960,
1155
+ "liên_tục": 961,
1156
+ "aarhus": 962,
1157
+ "cũng": 963,
1158
+ "zurich": 964,
1159
+ "thành_đô": 965,
1160
+ "phuket": 966,
1161
+ "ist": 967,
1162
+ "kiên": 968,
1163
+ "qatar": 969,
1164
+ "sắ": 970,
1165
+ "tam": 971,
1166
+ "đắt": 972,
1167
+ "##do": 973,
1168
+ "như": 974,
1169
+ "ngắn": 975,
1170
+ "##anbu": 976,
1171
+ "##lanta": 977,
1172
+ "atlanta": 978,
1173
+ "a32": 979,
1174
+ "istanbu": 980,
1175
+ "kiên_giang": 981,
1176
+ "sắp": 982,
1177
+ "istanbul": 983,
1178
+ "16": 984,
1179
+ "26": 985,
1180
+ "fl": 986,
1181
+ "gh": 987,
1182
+ "sy": 988,
1183
+ "tên": 989,
1184
+ "tạm": 990,
1185
+ "##gu": 991,
1186
+ "##gen": 992,
1187
+ "##ex": 993,
1188
+ "##ego": 994,
1189
+ "##dne": 995,
1190
+ "##ẳng": 996,
1191
+ "chào": 997,
1192
+ "thẳng": 998,
1193
+ "giải": 999,
1194
+ "##_tâm": 1000,
1195
+ "##_thích": 1001,
1196
+ "ngồi": 1002,
1197
+ "bao_x": 1003,
1198
+ "##an_tâm": 1004,
1199
+ "##ona": 1005,
1200
+ "##ort": 1006,
1201
+ "quan_tâm": 1007,
1202
+ "diego": 1008,
1203
+ "dubai": 1009,
1204
+ "flor": 1010,
1205
+ "ghế": 1011,
1206
+ "sydne": 1012,
1207
+ "giải_thích": 1013,
1208
+ "bao_xa": 1014,
1209
+ "sydney": 1015,
1210
+ "19": 1016,
1211
+ "31": 1017,
1212
+ "ang": 1018,
1213
+ "ber": 1019,
1214
+ "qx": 1020,
1215
+ "đâu": 1021,
1216
+ "##av": 1022,
1217
+ "##aw": 1023,
1218
+ "##na": 1024,
1219
+ "##est": 1025,
1220
+ "##30": 1026,
1221
+ "##ợp": 1027,
1222
+ "thực": 1028,
1223
+ "khi": 1029,
1224
+ "##_hiện": 1030,
1225
+ "##_hợp": 1031,
1226
+ "nhiều": 1032,
1227
+ "##_ba": 1033,
1228
+ "los": 1034,
1229
+ "##_nào": 1035,
1230
+ "##eles": 1036,
1231
+ "quảng_nam": 1037,
1232
+ "dli": 1038,
1233
+ "milan": 1039,
1234
+ "angeles": 1040,
1235
+ "thực_hiện": 1041,
1236
+ "23": 1042,
1237
+ "57": 1043,
1238
+ "co": 1044,
1239
+ "gần": 1045,
1240
+ "hành": 1046,
1241
+ "in": 1047,
1242
+ "patt": 1048,
1243
+ "##mb": 1049,
1244
+ "##io": 1050,
1245
+ "char": 1051,
1246
+ "##_cách": 1052,
1247
+ "##_hai": 1053,
1248
+ "##_hơi": 1054,
1249
+ "thứ_ba": 1055,
1250
+ "phi_c": 1056,
1251
+ "##aya": 1057,
1252
+ "##ana": 1058,
1253
+ "##_số": 1059,
1254
+ "xe_hơi": 1060,
1255
+ "khoảng_cách": 1061,
1256
+ "sống": 1062,
1257
+ "chiang": 1063,
1258
+ "dublin": 1064,
1259
+ "phù_hợp": 1065,
1260
+ "pattaya": 1066,
1261
+ "phi_cơ": 1067,
1262
+ "45": 1068,
1263
+ "73": 1069,
1264
+ "au": 1070,
1265
+ "da": 1071,
1266
+ "del": 1072,
1267
+ "nam": 1073,
1268
+ "qh": 1074,
1269
+ "##_ơn": 1075,
1270
+ "##m_ơn": 1076,
1271
+ "##ed": 1077,
1272
+ "##át": 1078,
1273
+ "##h30": 1079,
1274
+ "##ix": 1080,
1275
+ "##ếp": 1081,
1276
+ "##ốt": 1082,
1277
+ "nhĩ": 1083,
1278
+ "##ont": 1084,
1279
+ "##che": 1085,
1280
+ "##_đêm": 1086,
1281
+ "##eric": 1087,
1282
+ "##apol": 1088,
1283
+ "trở_lại": 1089,
1284
+ "americ": 1090,
1285
+ "##_xếp": 1091,
1286
+ "##ida": 1092,
1287
+ "##ckland": 1093,
1288
+ "cáp": 1094,
1289
+ "cảm_ơn": 1095,
1290
+ "sắp_xếp": 1096,
1291
+ "florida": 1097,
1292
+ "auckland": 1098,
1293
+ "ar": 1099,
1294
+ "col": 1100,
1295
+ "dad": 1101,
1296
+ "fort": 1102,
1297
+ "hp": 1103,
1298
+ "hav": 1104,
1299
+ "je": 1105,
1300
+ "lis": 1106,
1301
+ "nó": 1107,
1302
+ "north": 1108,
1303
+ "pet": 1109,
1304
+ "pen": 1110,
1305
+ "quyến": 1111,
1306
+ "ta": 1112,
1307
+ "tur": 1113,
1308
+ "worth": 1114,
1309
+ "đài": 1115,
1310
+ "##of": 1116,
1311
+ "##oen": 1117,
1312
+ "##em": 1118,
1313
+ "##ev": 1119,
1314
+ "##bon": 1120,
1315
+ "##ra": 1121,
1316
+ "##57": 1122,
1317
+ "##00": 1123,
1318
+ "##ju": 1124,
1319
+ "##ọn": 1125,
1320
+ "##úng": 1126,
1321
+ "##ậy": 1127,
1322
+ "thâm": 1128,
1323
+ "phoen": 1129,
1324
+ "##ino": 1130,
1325
+ "##_đị": 1131,
1326
+ "quá": 1132,
1327
+ "##ersburg": 1133,
1328
+ "mumb": 1134,
1329
+ "quảng_châu": 1135,
1330
+ "hai_mươi_bảy": 1136,
1331
+ "##lott": 1137,
1332
+ "##bus": 1138,
1333
+ "2021": 1139,
1334
+ "roma": 1140,
1335
+ "tam_kỳ": 1141,
1336
+ "a321": 1142,
1337
+ "charlott": 1143,
1338
+ "american": 1144,
1339
+ "havana": 1145,
1340
+ "jeju": 1146,
1341
+ "lisbon": 1147,
1342
+ "petersburg": 1148,
1343
+ "phoenix": 1149,
1344
+ "mumbai": 1150,
1345
+ "charlotte": 1151,
1346
+ "bắt": 1152,
1347
+ "hel": 1153,
1348
+ "lần": 1154,
1349
+ "mồng": 1155,
1350
+ "oak": 1156,
1351
+ "pi": 1157,
1352
+ "pal": 1158,
1353
+ "pra": 1159,
1354
+ "riệ": 1160,
1355
+ "sa": 1161,
1356
+ "st": 1162,
1357
+ "sto": 1163,
1358
+ "tây": 1164,
1359
+ "van": 1165,
1360
+ "xiê": 1166,
1361
+ "##ott": 1167,
1362
+ "##mo": 1168,
1363
+ "##nt": 1169,
1364
+ "##egu": 1170,
1365
+ "##án": 1171,
1366
+ "##ham": 1172,
1367
+ "##hol": 1173,
1368
+ "##iz": 1174,
1369
+ "##ro": 1175,
1370
+ "##ca": 1176,
1371
+ "##ki": 1177,
1372
+ "##ỗi": 1178,
1373
+ "##80": 1179,
1374
+ "chic": 1180,
1375
+ "##uyên": 1181,
1376
+ "khương": 1182,
1377
+ "nhỏ": 1183,
1378
+ "##_bắc": 1184,
1379
+ "một_số": 1185,
1380
+ "##_lâu": 1186,
1381
+ "bao_lâu": 1187,
1382
+ "##ania": 1188,
1383
+ "xem_lại": 1189,
1384
+ "##ermo": 1190,
1385
+ "##ice": 1191,
1386
+ "##then": 1192,
1387
+ "##aka": 1193,
1388
+ "##sinki": 1194,
1389
+ "##ttsburg": 1195,
1390
+ "##ckhol": 1196,
1391
+ "osaka": 1197,
1392
+ "daegu": 1198,
1393
+ "ariz": 1199,
1394
+ "đài_bắc": 1200,
1395
+ "helsinki": 1201,
1396
+ "pittsburg": 1202,
1397
+ "palermo": 1203,
1398
+ "riệp": 1204,
1399
+ "stockhol": 1205,
1400
+ "xiêm": 1206,
1401
+ "chicago": 1207,
1402
+ "arizona": 1208,
1403
+ "pittsburgh": 1209,
1404
+ "stockholm": 1210,
1405
+ "14": 1211,
1406
+ "7h": 1212,
1407
+ "72": 1213,
1408
+ "aer": 1214,
1409
+ "be": 1215,
1410
+ "br": 1216,
1411
+ "bình": 1217,
1412
+ "ed": 1218,
1413
+ "ew": 1219,
1414
+ "hiện": 1220,
1415
+ "ké": 1221,
1416
+ "mức": 1222,
1417
+ "mex": 1223,
1418
+ "mont": 1224,
1419
+ "mỗi": 1225,
1420
+ "nằ": 1226,
1421
+ "nử": 1227,
1422
+ "nay": 1228,
1423
+ "nor": 1229,
1424
+ "ont": 1230,
1425
+ "penh": 1231,
1426
+ "qo": 1232,
1427
+ "tp": 1233,
1428
+ "vũ": 1234,
1429
+ "yn": 1235,
1430
+ "ít": 1236,
1431
+ "##agen": 1237,
1432
+ "##a_đêm": 1238,
1433
+ "##nom": 1239,
1434
+ "##sel": 1240,
1435
+ "##eal": 1241,
1436
+ "##ảnh": 1242,
1437
+ "##real": 1243,
1438
+ "##cou": 1244,
1439
+ "##yên": 1245,
1440
+ "##kf": 1246,
1441
+ "##les": 1247,
1442
+ "##penh": 1248,
1443
+ "##ín": 1249,
1444
+ "chín": 1250,
1445
+ "##_cảnh": 1251,
1446
+ "nhau": 1252,
1447
+ "phnom": 1253,
1448
+ "##_lăm": 1254,
1449
+ "##inc": 1255,
1450
+ "##inburg": 1256,
1451
+ "##_khách": 1257,
1452
+ "đưa": 1258,
1453
+ "##ars": 1259,
1454
+ "##ark": 1260,
1455
+ "##ario": 1261,
1456
+ "phú_yên": 1262,
1457
+ "airbus": 1263,
1458
+ "##ingham": 1264,
1459
+ "##urt": 1265,
1460
+ "mua": 1266,
1461
+ "mười_lăm": 1267,
1462
+ "##ico": 1268,
1463
+ "##outh": 1269,
1464
+ "hai_mươi_hai": 1270,
1465
+ "torino": 1271,
1466
+ "##lot": 1272,
1467
+ "frankf": 1273,
1468
+ "281": 1274,
1469
+ "cát": 1275,
1470
+ "thế_nào": 1276,
1471
+ "oslo": 1277,
1472
+ "nhưng": 1278,
1473
+ "##awa": 1279,
1474
+ "copenh": 1280,
1475
+ "hành_khách": 1281,
1476
+ "##chester": 1282,
1477
+ "##apolis": 1283,
1478
+ "quá_cảnh": 1284,
1479
+ "oakland": 1285,
1480
+ "vancou": 1286,
1481
+ "edinburg": 1287,
1482
+ "ewr": 1288,
1483
+ "kém": 1289,
1484
+ "mexico": 1290,
1485
+ "montreal": 1291,
1486
+ "nằm": 1292,
1487
+ "nửa_đêm": 1293,
1488
+ "ontario": 1294,
1489
+ "frankfurt": 1295,
1490
+ "copenhagen": 1296,
1491
+ "vancouver": 1297,
1492
+ "edinburgh": 1298,
1493
+ "13": 1299,
1494
+ "22": 1300,
1495
+ "29": 1301,
1496
+ "34": 1302,
1497
+ "78": 1303,
1498
+ "athen": 1304,
1499
+ "càng": 1305,
1500
+ "car": 1306,
1501
+ "cat": 1307,
1502
+ "ham": 1308,
1503
+ "hán": 1309,
1504
+ "jak": 1310,
1505
+ "ly": 1311,
1506
+ "ma": 1312,
1507
+ "minh": 1313,
1508
+ "nế": 1314,
1509
+ "ninh": 1315,
1510
+ "or": 1316,
1511
+ "por": 1317,
1512
+ "qw": 1318,
1513
+ "sal": 1319,
1514
+ "tex": 1320,
1515
+ "us": 1321,
1516
+ "vdo": 1322,
1517
+ "xuyên": 1323,
1518
+ "##_ng": 1324,
1519
+ "##ồm": 1325,
1520
+ "##se": 1326,
1521
+ "##bi": 1327,
1522
+ "##u_v": 1328,
1523
+ "##hi": 1329,
1524
+ "##rid": 1330,
1525
+ "##ya": 1331,
1526
+ "##po": 1332,
1527
+ "##ốn": 1333,
1528
+ "##wich": 1334,
1529
+ "##west": 1335,
1530
+ "chúng": 1336,
1531
+ "bal": 1337,
1532
+ "##_cát": 1338,
1533
+ "khu_v": 1339,
1534
+ "##_nhất": 1340,
1535
+ "##arta": 1341,
1536
+ "##_gồm": 1342,
1537
+ "bao_gồm": 1343,
1538
+ "##ans": 1344,
1539
+ "##orad": 1345,
1540
+ "côn_minh": 1346,
1541
+ "mun": 1347,
1542
+ "newark": 1348,
1543
+ "manchester": 1349,
1544
+ "##ake": 1350,
1545
+ "seatt": 1351,
1546
+ "phù_cát": 1352,
1547
+ "bergen": 1353,
1548
+ "delhi": 1354,
1549
+ "colorad": 1355,
1550
+ "penang": 1356,
1551
+ "##oflot": 1357,
1552
+ "aeroflot": 1358,
1553
+ "norwich": 1359,
1554
+ "ít_nhất": 1360,
1555
+ "787": 1361,
1556
+ "jakarta": 1362,
1557
+ "lyon": 1363,
1558
+ "nếu": 1364,
1559
+ "porto": 1365,
1560
+ "salt": 1366,
1561
+ "texas": 1367,
1562
+ "khu_vực": 1368,
1563
+ "seattle": 1369,
1564
+ "colorado": 1370,
1565
+ "18": 1371,
1566
+ "25": 1372,
1567
+ "24": 1373,
1568
+ "82": 1374,
1569
+ "an": 1375,
1570
+ "abu": 1376,
1571
+ "bi": 1377,
1572
+ "bốn": 1378,
1573
+ "cx": 1379,
1574
+ "can": 1380,
1575
+ "cit": 1381,
1576
+ "cinc": 1382,
1577
+ "dha": 1383,
1578
+ "ea": 1384,
1579
+ "ff": 1385,
1580
+ "hph": 1386,
1581
+ "han": 1387,
1582
+ "lự": 1388,
1583
+ "lake": 1389,
1584
+ "mi": 1390,
1585
+ "min": 1391,
1586
+ "mad": 1392,
1587
+ "mốt": 1393,
1588
+ "m80": 1394,
1589
+ "nago": 1395,
1590
+ "nice": 1396,
1591
+ "pis": 1397,
1592
+ "sg": 1398,
1593
+ "sen": 1399,
1594
+ "tiếp": 1400,
1595
+ "tốt": 1401,
1596
+ "ua": 1402,
1597
+ "un": 1403,
1598
+ "ven": 1404,
1599
+ "vậy": 1405,
1600
+ "wars": 1406,
1601
+ "xu": 1407,
1602
+ "##a_ch": 1408,
1603
+ "##ow": 1409,
1604
+ "##olin": 1410,
1605
+ "##gan": 1411,
1606
+ "##gia": 1412,
1607
+ "##gna": 1413,
1608
+ "##ma": 1414,
1609
+ "##mouth": 1415,
1610
+ "##sh": 1416,
1611
+ "##eo": 1417,
1612
+ "##tor": 1418,
1613
+ "##tim": 1419,
1614
+ "##ản": 1420,
1615
+ "##24": 1421,
1616
+ "##kk": 1422,
1617
+ "##vil": 1423,
1618
+ "##77": 1424,
1619
+ "##dai": 1425,
1620
+ "##qu": 1426,
1621
+ "##xi": 1427,
1622
+ "chở": 1428,
1623
+ "##_bình": 1429,
1624
+ "ngừng": 1430,
1625
+ "trùng": 1431,
1626
+ "##ini": 1432,
1627
+ "##aido": 1433,
1628
+ "##_khánh": 1434,
1629
+ "##ess": 1435,
1630
+ "hou": 1436,
1631
+ "hokk": 1437,
1632
+ "##ati": 1438,
1633
+ "##ada": 1439,
1634
+ "bolo": 1440,
1635
+ "cuối": 1441,
1636
+ "ap57": 1442,
1637
+ "##ited": 1443,
1638
+ "a320": 1444,
1639
+ "##gue": 1445,
1640
+ "indi": 1446,
1641
+ "737": 1447,
1642
+ "taxi": 1448,
1643
+ "turin": 1449,
1644
+ "##_định": 1450,
1645
+ "prague": 1451,
1646
+ "athens": 1452,
1647
+ "carolin": 1453,
1648
+ "catania": 1454,
1649
+ "hamburg": 1455,
1650
+ "##pool": 1456,
1651
+ "baltim": 1457,
1652
+ "city": 1458,
1653
+ "cincin": 1459,
1654
+ "dhabi": 1460,
1655
+ "lựa_ch": 1461,
1656
+ "madrid": 1462,
1657
+ "nagoya": 1463,
1658
+ "pisa": 1464,
1659
+ "sgn": 1465,
1660
+ "sendai": 1466,
1661
+ "united": 1467,
1662
+ "trùng_khánh": 1468,
1663
+ "hokkaido": 1469,
1664
+ "bologna": 1470,
1665
+ "carolina": 1471,
1666
+ "baltimore": 1472,
1667
+ "lựa_chọn": 1473,
1668
+ "500": 1474,
1669
+ "as": 1475,
1670
+ "al": 1476,
1671
+ "bất_c": 1477,
1672
+ "bir": 1478,
1673
+ "cố": 1479,
1674
+ "cao": 1480,
1675
+ "d9": 1481,
1676
+ "dil": 1482,
1677
+ "fir": 1483,
1678
+ "ge": 1484,
1679
+ "hộ": 1485,
1680
+ "hay": 1486,
1681
+ "hat": 1487,
1682
+ "ki": 1488,
1683
+ "kil": 1489,
1684
+ "kyo": 1490,
1685
+ "kago": 1491,
1686
+ "le": 1492,
1687
+ "li": 1493,
1688
+ "las": 1494,
1689
+ "lên": 1495,
1690
+ "mes": 1496,
1691
+ "mem": 1497,
1692
+ "nash": 1498,
1693
+ "pao": 1499,
1694
+ "port": 1500,
1695
+ "rott": 1501,
1696
+ "sự": 1502,
1697
+ "tổ": 1503,
1698
+ "tiết": 1504,
1699
+ "ulan": 1505,
1700
+ "ve": 1506,
1701
+ "vâ": 1507,
1702
+ "yai": 1508,
1703
+ "đo": 1509,
1704
+ "đồ": 1510,
1705
+ "##ab": 1511,
1706
+ "##a_tr": 1512,
1707
+ "##_ăn": 1513,
1708
+ "##gas": 1514,
1709
+ "##mingham": 1515,
1710
+ "##ssel": 1516,
1711
+ "##um": 1517,
1712
+ "##h1": 1518,
1713
+ "##28": 1519,
1714
+ "##39": 1520,
1715
+ "##kis": 1521,
1716
+ "##la": 1522,
1717
+ "##ộng": 1523,
1718
+ "##pa": 1524,
1719
+ "##fth": 1525,
1720
+ "##dor": 1526,
1721
+ "##dia": 1527,
1722
+ "##18": 1528,
1723
+ "##àu": 1529,
1724
+ "##ze": 1530,
1725
+ "##ầm": 1531,
1726
+ "##ắk": 1532,
1727
+ "##ận": 1533,
1728
+ "##ặng": 1534,
1729
+ "##ngal": 1535,
1730
+ "chữ": 1536,
1731
+ "chuyển": 1537,
1732
+ "chặng": 1538,
1733
+ "ba_mươi": 1539,
1734
+ "thôi": 1540,
1735
+ "##_tám": 1541,
1736
+ "##phis": 1542,
1737
+ "##_phát": 1543,
1738
+ "##ất_phát": 1544,
1739
+ "nhẹ": 1545,
1740
+ "nha_tr": 1546,
1741
+ "##_kiệ": 1547,
1742
+ "##_lực": 1548,
1743
+ "##_lượng": 1549,
1744
+ "hui": 1550,
1745
+ "bur": 1551,
1746
+ "##int": 1552,
1747
+ "##inawa": 1553,
1748
+ "##ami": 1554,
1749
+ "hàng_châu": 1555,
1750
+ "##ạt_đ": 1556,
1751
+ "##airo": 1557,
1752
+ "##_thuận": 1558,
1753
+ "##and": 1559,
1754
+ "##anapolis": 1560,
1755
+ "vien": 1561,
1756
+ "##_đầu": 1562,
1757
+ "##_vậy": 1563,
1758
+ "##orgia": 1564,
1759
+ "##uru": 1565,
1760
+ "##erdam": 1566,
1761
+ "hoạt_đ": 1567,
1762
+ "##enze": 1568,
1763
+ "mười_hai": 1569,
1764
+ "mười_tám": 1570,
1765
+ "số_lượng": 1571,
1766
+ "quảng_bình": 1572,
1767
+ "lufth": 1573,
1768
+ "##coma": 1574,
1769
+ "santor": 1575,
1770
+ "##omet": 1576,
1771
+ "##verpool": 1577,
1772
+ "##sina": 1578,
1773
+ "##ima": 1579,
1774
+ "okinawa": 1580,
1775
+ "tampa": 1581,
1776
+ "bern": 1582,
1777
+ "delta": 1583,
1778
+ "tacoma": 1584,
1779
+ "turkis": 1585,
1780
+ "##_địa": 1586,
1781
+ "bắt_đầu": 1587,
1782
+ "saint": 1588,
1783
+ "bengal": 1589,
1784
+ "main": 1590,
1785
+ "##ansa": 1591,
1786
+ "825": 1592,
1787
+ "miami": 1593,
1788
+ "xuất_phát": 1594,
1789
+ "##shima": 1595,
1790
+ "##ản_lực": 1596,
1791
+ "##ville": 1597,
1792
+ "houston": 1598,
1793
+ "indianapolis": 1599,
1794
+ "bất_cứ": 1600,
1795
+ "birmingham": 1601,
1796
+ "d9s": 1602,
1797
+ "dili": 1603,
1798
+ "firenze": 1604,
1799
+ "georgia": 1605,
1800
+ "kiev": 1606,
1801
+ "kilomet": 1607,
1802
+ "kyoto": 1608,
1803
+ "kagoshima": 1609,
1804
+ "liverpool": 1610,
1805
+ "messina": 1611,
1806
+ "memphis": 1612,
1807
+ "nashville": 1613,
1808
+ "paolo": 1614,
1809
+ "rotterdam": 1615,
1810
+ "tổng": 1616,
1811
+ "tiết_kiệ": 1617,
1812
+ "vegas": 1618,
1813
+ "vâng": 1619,
1814
+ "đồ_ăn": 1620,
1815
+ "##sseldor": 1621,
1816
+ "nha_trang": 1622,
1817
+ "vienna": 1623,
1818
+ "hoạt_động": 1624,
1819
+ "lufthansa": 1625,
1820
+ "santorini": 1626,
1821
+ "turkish": 1627,
1822
+ "bengaluru": 1628,
1823
+ "tiết_kiệm": 1629,
1824
+ "##sseldorf": 1630,
1825
+ "38": 1631,
1826
+ "55": 1632,
1827
+ "74": 1633,
1828
+ "80": 1634,
1829
+ "81": 1635,
1830
+ "cơ": 1636,
1831
+ "cas": 1637,
1832
+ "cont": 1638,
1833
+ "cairo": 1639,
1834
+ "det": 1640,
1835
+ "fn": 1641,
1836
+ "fuk": 1642,
1837
+ "f28": 1643,
1838
+ "gev": 1644,
1839
+ "jai": 1645,
1840
+ "jer": 1646,
1841
+ "kr": 1647,
1842
+ "kế": 1648,
1843
+ "kol": 1649,
1844
+ "lau": 1650,
1845
+ "lục": 1651,
1846
+ "mala": 1652,
1847
+ "mid": 1653,
1848
+ "nap": 1654,
1849
+ "napol": 1655,
1850
+ "nott": 1656,
1851
+ "oh": 1657,
1852
+ "ra": 1658,
1853
+ "re": 1659,
1854
+ "si": 1660,
1855
+ "sic": 1661,
1856
+ "sap": 1662,
1857
+ "sof": 1663,
1858
+ "tứ": 1664,
1859
+ "tw": 1665,
1860
+ "tỉ": 1666,
1861
+ "tàu": 1667,
1862
+ "tầm": 1668,
1863
+ "ula": 1669,
1864
+ "vẫ": 1670,
1865
+ "vậ": 1671,
1866
+ "việt": 1672,
1867
+ "ver": 1673,
1868
+ "xác": 1674,
1869
+ "zad": 1675,
1870
+ "ý_ng": 1676,
1871
+ "độ": 1677,
1872
+ "đối": 1678,
1873
+ "đại": 1679,
1874
+ "đảo": 1680,
1875
+ "đúng": 1681,
1876
+ "##aatar": 1682,
1877
+ "##mai": 1683,
1878
+ "##ạng": 1684,
1879
+ "##ns": 1685,
1880
+ "##n_đêm": 1686,
1881
+ "##ơ_ph": 1687,
1882
+ "##ee": 1688,
1883
+ "##eb": 1689,
1884
+ "##ec": 1690,
1885
+ "##embo": 1691,
1886
+ "##baatar": 1692,
1887
+ "##uok": 1693,
1888
+ "##tal": 1694,
1889
+ "##h4": 1695,
1890
+ "##hĩa": 1696,
1891
+ "##iể": 1697,
1892
+ "##iến": 1698,
1893
+ "##ce": 1699,
1894
+ "##cca": 1700,
1895
+ "##20": 1701,
1896
+ "##yl": 1702,
1897
+ "##kee": 1703,
1898
+ "##91": 1704,
1899
+ "##ly": 1705,
1900
+ "##ế_nào": 1706,
1901
+ "##por": 1707,
1902
+ "##vania": 1708,
1903
+ "##dap": 1709,
1904
+ "##65": 1710,
1905
+ "##10": 1711,
1906
+ "##ường": 1712,
1907
+ "##wau": 1713,
1908
+ "##zawa": 1714,
1909
+ "##xembo": 1715,
1910
+ "##ắng": 1716,
1911
+ "chọn": 1717,
1912
+ "bar": 1718,
1913
+ "ban_đêm": 1719,
1914
+ "thess": 1720,
1915
+ "thường": 1721,
1916
+ "##_hải": 1722,
1917
+ "có_m": 1723,
1918
+ "nhưỡ": 1724,
1919
+ "##_theo": 1725,
1920
+ "##_thế_nào": 1726,
1921
+ "##_kia": 1727,
1922
+ "phan": 1728,
1923
+ "ngày_kia": 1729,
1924
+ "budap": 1730,
1925
+ "##inen": 1731,
1926
+ "##_hoạch": 1732,
1927
+ "##art": 1733,
1928
+ "##ardia": 1734,
1929
+ "##_gắng": 1735,
1930
+ "##anbaatar": 1736,
1931
+ "##onik": 1737,
1932
+ "##chen": 1738,
1933
+ "##_sở": 1739,
1934
+ "##_viết": 1740,
1935
+ "##_với": 1741,
1936
+ "##ord": 1742,
1937
+ "##alonik": 1743,
1938
+ "logan": 1744,
1939
+ "khoang": 1745,
1940
+ "queb": 1746,
1941
+ "1291": 1747,
1942
+ "##ena": 1748,
1943
+ "##ence": 1749,
1944
+ "200": 1750,
1945
+ "##illy": 1751,
1946
+ "ăn_không": 1752,
1947
+ "mười_bảy": 1753,
1948
+ "luxembo": 1754,
1949
+ "##ng_cơ_ph": 1755,
1950
+ "##apan": 1756,
1951
+ "neo": 1757,
1952
+ "##lanca": 1758,
1953
+ "##neapolis": 1759,
1954
+ "##akow": 1760,
1955
+ "dusseldorf": 1761,
1956
+ "270": 1762,
1957
+ "##n_chuyển": 1763,
1958
+ "ap80": 1764,
1959
+ "##ượt": 1765,
1960
+ "##sanne": 1766,
1961
+ "##lando": 1767,
1962
+ "##ussel": 1768,
1963
+ "atr": 1769,
1964
+ "thượng_hải": 1770,
1965
+ "milwau": 1771,
1966
+ "như_vậy": 1772,
1967
+ "như_thế_nào": 1773,
1968
+ "florence": 1774,
1969
+ "hành_trình": 1775,
1970
+ "inche": 1776,
1971
+ "chiangmai": 1777,
1972
+ "northwest": 1778,
1973
+ "penns": 1779,
1974
+ "praha": 1780,
1975
+ "##roit": 1781,
1976
+ "7h18": 1782,
1977
+ "brussel": 1783,
1978
+ "296": 1784,
1979
+ "orlando": 1785,
1980
+ "##sey": 1786,
1981
+ "munich": 1787,
1982
+ "munchen": 1788,
1983
+ "canada": 1789,
1984
+ "minneapolis": 1790,
1985
+ "tiếp_theo": 1791,
1986
+ "venice": 1792,
1987
+ "warsaw": 1793,
1988
+ "cincinati": 1794,
1989
+ "cố_gắng": 1795,
1990
+ "##ablanca": 1796,
1991
+ "chữ_viết": 1797,
1992
+ "chuyển_tiếp": 1798,
1993
+ "tổng_số": 1799,
1994
+ "813": 1800,
1995
+ "cơ_sở": 1801,
1996
+ "casablanca": 1802,
1997
+ "continen": 1803,
1998
+ "detroit": 1804,
1999
+ "fukuok": 1805,
2000
+ "gevena": 1806,
2001
+ "jaipur": 1807,
2002
+ "jersey": 1808,
2003
+ "krakow": 1809,
2004
+ "kế_hoạch": 1810,
2005
+ "lausanne": 1811,
2006
+ "lục_địa": 1812,
2007
+ "malacca": 1813,
2008
+ "naples": 1814,
2009
+ "napoli": 1815,
2010
+ "nottingham": 1816,
2011
+ "ohio": 1817,
2012
+ "reap": 1818,
2013
+ "siem": 1819,
2014
+ "sicilly": 1820,
2015
+ "sappor": 1821,
2016
+ "sofia": 1822,
2017
+ "twa": 1823,
2018
+ "tỉnh": 1824,
2019
+ "ulaanbaatar": 1825,
2020
+ "vẫn": 1826,
2021
+ "vận_chuyển": 1827,
2022
+ "việt_nam": 1828,
2023
+ "verona": 1829,
2024
+ "xác_định": 1830,
2025
+ "zadar": 1831,
2026
+ "ý_nghĩa": 1832,
2027
+ "động_cơ_ph": 1833,
2028
+ "đối_với": 1834,
2029
+ "##iểu": 1835,
2030
+ "##ylvania": 1836,
2031
+ "thessalonik": 1837,
2032
+ "có_mặt": 1838,
2033
+ "nhưỡng": 1839,
2034
+ "budapest": 1840,
2035
+ "quebec": 1841,
2036
+ "luxembourg": 1842,
2037
+ "milwaukee": 1843,
2038
+ "incheon": 1844,
2039
+ "pennsylvania": 1845,
2040
+ "brussels": 1846,
2041
+ "continental": 1847,
2042
+ "fukuoka": 1848,
2043
+ "sapporo": 1849,
2044
+ "động_cơ_phản_lực": 1850,
2045
+ "thessaloniki": 1851,
2046
+ "05": 1852,
2047
+ "33": 1853,
2048
+ "37": 1854,
2049
+ "40": 1855,
2050
+ "41": 1856,
2051
+ "4h30": 1857,
2052
+ "50": 1858,
2053
+ "5h30": 1859,
2054
+ "6h30": 1860,
2055
+ "76": 1861,
2056
+ "757": 1862,
2057
+ "777": 1863,
2058
+ "ac": 1864,
2059
+ "bh": 1865,
2060
+ "bord": 1866,
2061
+ "cp": 1867,
2062
+ "cánh": 1868,
2063
+ "cle": 1869,
2064
+ "de": 1870,
2065
+ "df": 1871,
2066
+ "din": 1872,
2067
+ "dùng": 1873,
2068
+ "dương": 1874,
2069
+ "dart": 1875,
2070
+ "ex": 1876,
2071
+ "go": 1877,
2072
+ "gau": 1878,
2073
+ "gen": 1879,
2074
+ "hà": 1880,
2075
+ "hội": 1881,
2076
+ "hùng": 1882,
2077
+ "hak": 1883,
2078
+ "jos": 1884,
2079
+ "kè": 1885,
2080
+ "kia": 1886,
2081
+ "lh": 1887,
2082
+ "lớ": 1888,
2083
+ "lâm": 1889,
2084
+ "liệu": 1890,
2085
+ "lượt": 1891,
2086
+ "mô": 1892,
2087
+ "mà_": 1893,
2088
+ "mac": 1894,
2089
+ "mich": 1895,
2090
+ "no": 1896,
2091
+ "nw": 1897,
2092
+ "nữa": 1898,
2093
+ "nên": 1899,
2094
+ "nơi": 1900,
2095
+ "per": 1901,
2096
+ "ri": 1902,
2097
+ "rất": 1903,
2098
+ "rago": 1904,
2099
+ "sam": 1905,
2100
+ "south": 1906,
2101
+ "te": 1907,
2102
+ "toul": 1908,
2103
+ "tích": 1909,
2104
+ "tiến": 1910,
2105
+ "vk": 1911,
2106
+ "vô": 1912,
2107
+ "vì": 1913,
2108
+ "việ": 1914,
2109
+ "viên": 1915,
2110
+ "vinc": 1916,
2111
+ "west": 1917,
2112
+ "đá": 1918,
2113
+ "đã": 1919,
2114
+ "đôn": 1920,
2115
+ "đắk": 1921,
2116
+ "ổn": 1922,
2117
+ "##od": 1923,
2118
+ "##_an": 1924,
2119
+ "##_qu": 1925,
2120
+ "##mat": 1926,
2121
+ "##nes": 1927,
2122
+ "##nati": 1928,
2123
+ "##sk": 1929,
2124
+ "##smouth": 1930,
2125
+ "##ez": 1931,
2126
+ "##eau": 1932,
2127
+ "##ban": 1933,
2128
+ "##uân": 1934,
2129
+ "##ái": 1935,
2130
+ "##hm": 1936,
2131
+ "##h5": 1937,
2132
+ "##hran": 1938,
2133
+ "##h00": 1939,
2134
+ "##h24": 1940,
2135
+ "##ie": 1941,
2136
+ "##ian": 1942,
2137
+ "##ile": 1943,
2138
+ "##iel": 1944,
2139
+ "##igan": 1945,
2140
+ "##ress": 1946,
2141
+ "##còn": 1947,
2142
+ "##cel": 1948,
2143
+ "##ón": 1949,
2144
+ "##22": 1950,
2145
+ "##52": 1951,
2146
+ "##55": 1952,
2147
+ "##yad": 1953,
2148
+ "##york": 1954,
2149
+ "##38": 1955,
2150
+ "##ị_tr": 1956,
2151
+ "##09": 1957,
2152
+ "##lle": 1958,
2153
+ "##lles": 1959,
2154
+ "##press": 1960,
2155
+ "##ố_đ": 1961,
2156
+ "##vel": 1962,
2157
+ "##vada": 1963,
2158
+ "##fiel": 1964,
2159
+ "##73": 1965,
2160
+ "##ìn": 1966,
2161
+ "##84": 1967,
2162
+ "##ọ_x": 1968,
2163
+ "##úng_t": 1969,
2164
+ "##ữ_l": 1970,
2165
+ "chúng_t": 1971,
2166
+ "bato": 1972,
2167
+ "##uya": 1973,
2168
+ "thích": 1974,
2169
+ "theo": 1975,
2170
+ "thọ_x": 1976,
2171
+ "##_tả": 1977,
2172
+ "##_có": 1978,
2173
+ "khuya": 1979,
2174
+ "##_hàng": 1980,
2175
+ "##_hiểu": 1981,
2176
+ "##_thiết": 1982,
2177
+ "##_một": 1983,
2178
+ "##_biệt": 1984,
2179
+ "##_bat": 1985,
2180
+ "phố_đ": 1986,
2181
+ "trapan": 1987,
2182
+ "##_lịch": 1988,
2183
+ "##_lắk": 1989,
2184
+ "hãn": 1990,
2185
+ "tháng_một": 1991,
2186
+ "##ardo": 1992,
2187
+ "##arie": 1993,
2188
+ "##onardo": 1994,
2189
+ "vii": 1995,
2190
+ "đường_hàng": 1996,
2191
+ "##_đồng": 1997,
2192
+ "##_đón": 1998,
2193
+ "dc10": 1999,
2194
+ "đồng_n": 2000,
2195
+ "tìm_hiểu": 2001,
2196
+ "##alia": 2002,
2197
+ "##alile": 2003,
2198
+ "lomb": 2004,
2199
+ "nghìn": 2005,
2200
+ "quanh": 2006,
2201
+ "1039": 2007,
2202
+ "1222": 2008,
2203
+ "lúc_nào": 2009,
2204
+ "hoàng": 2010,
2205
+ "##_dữ_l": 2011,
2206
+ "201": 2012,
2207
+ "##ouse": 2013,
2208
+ "##ata": 2014,
2209
+ "##ate": 2015,
2210
+ "##adian": 2016,
2211
+ "nevada": 2017,
2212
+ "newyork": 2018,
2213
+ "dulles": 2019,
2214
+ "du_lịch": 2020,
2215
+ "khác_biệt": 2021,
2216
+ "1500": 2022,
2217
+ "france": 2023,
2218
+ "limo": 2024,
2219
+ "1765": 2025,
2220
+ "17h00": 2026,
2221
+ "21h": 2027,
2222
+ "217": 2028,
2223
+ "##italia": 2029,
2224
+ "263": 2030,
2225
+ "269": 2031,
2226
+ "ghế_ng": 2032,
2227
+ "berlin": 2033,
2228
+ "##avel": 2034,
2229
+ "charles": 2035,
2230
+ "73s": 2036,
2231
+ "733": 2037,
2232
+ "colum": 2038,
2233
+ "##eva": 2039,
2234
+ "##thenburg": 2040,
2235
+ "7h20": 2041,
2236
+ "727": 2042,
2237
+ "bras": 2043,
2238
+ "bình_định": 2044,
2239
+ "bình_thuận": 2045,
2240
+ "đưa_đón": 2046,
2241
+ "343": 2047,
2242
+ "max": 2048,
2243
+ "ninh_thuận": 2049,
2244
+ "ord": 2050,
2245
+ "canadian": 2051,
2246
+ "ea_có": 2052,
2247
+ "venez": 2053,
2248
+ "warszawa": 2054,
2249
+ "##quarie": 2055,
2250
+ "cincinnati": 2056,
2251
+ "almat": 2057,
2252
+ "alitalia": 2058,
2253
+ "leonardo": 2059,
2254
+ "portsmouth": 2060,
2255
+ "ulan_bat": 2061,
2256
+ "##h15": 2062,
2257
+ "burban": 2063,
2258
+ "747": 2064,
2259
+ "koln": 2065,
2260
+ "midwest": 2066,
2261
+ "##h45": 2067,
2262
+ "barcel": 2068,
2263
+ "thường_châu": 2069,
2264
+ "phan_thiết": 2070,
2265
+ "cơ_sở_dữ_l": 2071,
2266
+ "3724": 2072,
2267
+ "417": 2073,
2268
+ "767": 2074,
2269
+ "bordeau": 2075,
2270
+ "cánh_qu": 2076,
2271
+ "clevel": 2077,
2272
+ "dfw": 2078,
2273
+ "dartmouth": 2079,
2274
+ "express": 2080,
2275
+ "gothenburg": 2081,
2276
+ "gaulle": 2082,
2277
+ "geneva": 2083,
2278
+ "hàm": 2084,
2279
+ "hội_an": 2085,
2280
+ "hakod": 2086,
2281
+ "jose": 2087,
2282
+ "kèm": 2088,
2283
+ "lớn": 2089,
2284
+ "lâm_đồng": 2090,
2285
+ "mô_tả": 2091,
2286
+ "mà_còn": 2092,
2287
+ "macquarie": 2093,
2288
+ "michigan": 2094,
2289
+ "riyad": 2095,
2290
+ "ragoon": 2096,
2291
+ "tehran": 2097,
2292
+ "toulouse": 2098,
2293
+ "tiếng": 2099,
2294
+ "vkg": 2100,
2295
+ "việc": 2101,
2296
+ "vinci": 2102,
2297
+ "đáp": 2103,
2298
+ "đắk_lắk": 2104,
2299
+ "##field": 2105,
2300
+ "chúng_tôi": 2106,
2301
+ "thọ_xuân": 2107,
2302
+ "phố_đông": 2108,
2303
+ "trapani": 2109,
2304
+ "đồng_nai": 2110,
2305
+ "lombardia": 2111,
2306
+ "ghế_ngồi": 2112,
2307
+ "columbus": 2113,
2308
+ "venezia": 2114,
2309
+ "almaty": 2115,
2310
+ "ulan_bator": 2116,
2311
+ "burbank": 2117,
2312
+ "barcelona": 2118,
2313
+ "cơ_sở_dữ_liệu": 2119,
2314
+ "bordeaux": 2120,
2315
+ "cánh_quạt": 2121,
2316
+ "cleveland": 2122,
2317
+ "hakodate": 2123,
2318
+ "riyadh": 2124,
2319
+ "1h30": 2125,
2320
+ "2h": 2126,
2321
+ "257": 2127,
2322
+ "32": 2128,
2323
+ "324": 2129,
2324
+ "3h24": 2130,
2325
+ "352": 2131,
2326
+ "49": 2132,
2327
+ "44": 2133,
2328
+ "430": 2134,
2329
+ "4h15": 2135,
2330
+ "4h45": 2136,
2331
+ "539": 2137,
2332
+ "5h15": 2138,
2333
+ "5h45": 2139,
2334
+ "6h": 2140,
2335
+ "63": 2141,
2336
+ "60": 2142,
2337
+ "639": 2143,
2338
+ "665": 2144,
2339
+ "673": 2145,
2340
+ "79": 2146,
2341
+ "77": 2147,
2342
+ "8h": 2148,
2343
+ "84": 2149,
2344
+ "8h1": 2150,
2345
+ "852": 2151,
2346
+ "838": 2152,
2347
+ "98": 2153,
2348
+ "928": 2154,
2349
+ "918": 2155,
2350
+ "aa": 2156,
2351
+ "alas": 2157,
2352
+ "ahm": 2158,
2353
+ "bn": 2159,
2354
+ "bs": 2160,
2355
+ "bl": 2161,
2356
+ "bw": 2162,
2357
+ "biên": 2163,
2358
+ "bên": 2164,
2359
+ "bna": 2165,
2360
+ "bán": 2166,
2361
+ "b77": 2167,
2362
+ "cạ": 2168,
2363
+ "ce": 2169,
2364
+ "cv": 2170,
2365
+ "cq": 2171,
2366
+ "cư": 2172,
2367
+ "cẩ": 2173,
2368
+ "cou": 2174,
2369
+ "câu": 2175,
2370
+ "dự": 2176,
2371
+ "dü": 2177,
2372
+ "dành": 2178,
2373
+ "don": 2179,
2374
+ "dài": 2180,
2375
+ "dạng": 2181,
2376
+ "d10": 2182,
2377
+ "em": 2183,
2378
+ "ek": 2184,
2379
+ "edi": 2185,
2380
+ "fi": 2186,
2381
+ "fr": 2187,
2382
+ "fin": 2188,
2383
+ "flot": 2189,
2384
+ "gu": 2190,
2385
+ "gol": 2191,
2386
+ "gore": 2192,
2387
+ "gọn": 2193,
2388
+ "gán": 2194,
2389
+ "galile": 2195,
2390
+ "hy": 2196,
2391
+ "họ": 2197,
2392
+ "hè": 2198,
2393
+ "hà_": 2199,
2394
+ "hỏi": 2200,
2395
+ "hằng": 2201,
2396
+ "haw": 2202,
2397
+ "hart": 2203,
2398
+ "il": 2204,
2399
+ "ipo": 2205,
2400
+ "jf": 2206,
2401
+ "jal": 2207,
2402
+ "ko": 2208,
2403
+ "kể": 2209,
2404
+ "ký": 2210,
2405
+ "kal": 2211,
2406
+ "kịch": 2212,
2407
+ "kat": 2213,
2408
+ "kem": 2214,
2409
+ "kans": 2215,
2410
+ "ls": 2216,
2411
+ "lệ": 2217,
2412
+ "lặ": 2218,
2413
+ "long": 2219,
2414
+ "lan": 2220,
2415
+ "lâu": 2221,
2416
+ "lượng": 2222,
2417
+ "lấy": 2223,
2418
+ "lạng": 2224,
2419
+ "mc": 2225,
2420
+ "mó": 2226,
2421
+ "mù": 2227,
2422
+ "mú": 2228,
2423
+ "mas": 2229,
2424
+ "mash": 2230,
2425
+ "mang": 2231,
2426
+ "mia": 2232,
2427
+ "mặt": 2233,
2428
+ "mình": 2234,
2429
+ "mat": 2235,
2430
+ "mco": 2236,
2431
+ "med": 2237,
2432
+ "nẵng": 2238,
2433
+ "nan": 2239,
2434
+ "nairo": 2240,
2435
+ "ol": 2241,
2436
+ "pe": 2242,
2437
+ "pat": 2243,
2438
+ "pad": 2244,
2439
+ "qr": 2245,
2440
+ "ro": 2246,
2441
+ "rang": 2247,
2442
+ "rim": 2248,
2443
+ "rằng": 2249,
2444
+ "sb": 2250,
2445
+ "su": 2251,
2446
+ "sh": 2252,
2447
+ "sl": 2253,
2448
+ "sf": 2254,
2449
+ "sd": 2255,
2450
+ "sw": 2256,
2451
+ "sin": 2257,
2452
+ "sầm": 2258,
2453
+ "tb": 2259,
2454
+ "td": 2260,
2455
+ "tuyến": 2261,
2456
+ "ten": 2262,
2457
+ "tos": 2263,
2458
+ "tott": 2264,
2459
+ "tốn": 2265,
2460
+ "tow": 2266,
2461
+ "uta": 2267,
2462
+ "vác": 2268,
2463
+ "var": 2269,
2464
+ "vca": 2270,
2465
+ "vượt": 2271,
2466
+ "vị_tr": 2272,
2467
+ "wel": 2273,
2468
+ "xuố": 2274,
2469
+ "yy": 2275,
2470
+ "yang": 2276,
2471
+ "yok": 2277,
2472
+ "áp": 2278,
2473
+ "ôn": 2279,
2474
+ "ô_t": 2280,
2475
+ "ôn_ch": 2281,
2476
+ "đề": 2282,
2477
+ "đông": 2283,
2478
+ "đổi": 2284,
2479
+ "đây": 2285,
2480
+ "đấy": 2286,
2481
+ "##a_th": 2287,
2482
+ "##ast": 2288,
2483
+ "##ach": 2289,
2484
+ "##op": 2290,
2485
+ "##ov": 2291,
2486
+ "##o_g": 2292,
2487
+ "##oan": 2293,
2488
+ "##ois": 2294,
2489
+ "##ombo": 2295,
2490
+ "##ota": 2296,
2491
+ "##oham": 2297,
2492
+ "##_w": 2298,
2493
+ "##ge": 2299,
2494
+ "##gdo": 2300,
2495
+ "##gra": 2301,
2496
+ "##me": 2302,
2497
+ "##m_ph": 2303,
2498
+ "##m_giá": 2304,
2499
+ "##nai": 2305,
2500
+ "##nik": 2306,
2501
+ "##now": 2307,
2502
+ "##so": 2308,
2503
+ "##suy": 2309,
2504
+ "##sach": 2310,
2505
+ "##ef": 2311,
2506
+ "##eon": 2312,
2507
+ "##bh": 2313,
2508
+ "##bir": 2314,
2509
+ "##bình": 2315,
2510
+ "##brid": 2316,
2511
+ "##ua": 2317,
2512
+ "##ut": 2318,
2513
+ "##uế": 2319,
2514
+ "##ugia": 2320,
2515
+ "##áo": 2321,
2516
+ "##ts": 2322,
2517
+ "##ti": 2323,
2518
+ "##tiên": 2324,
2519
+ "##hay": 2325,
2520
+ "##h20": 2326,
2521
+ "##ảm_giá": 2327,
2522
+ "##ij": 2328,
2523
+ "##iểm": 2329,
2524
+ "##i_giờ": 2330,
2525
+ "##ibir": 2331,
2526
+ "##ric": 2332,
2527
+ "##ravel": 2333,
2528
+ "##cy": 2334,
2529
+ "##cer": 2335,
2530
+ "##cana": 2336,
2531
+ "##25": 2337,
2532
+ "##53": 2338,
2533
+ "##50": 2339,
2534
+ "##59": 2340,
2535
+ "##58": 2341,
2536
+ "##yid": 2342,
2537
+ "##ãnh": 2343,
2538
+ "##34": 2344,
2539
+ "##05": 2345,
2540
+ "##07": 2346,
2541
+ "##ka": 2347,
2542
+ "##kata": 2348,
2543
+ "##let": 2349,
2544
+ "##lia": 2350,
2545
+ "##lav": 2351,
2546
+ "##ộc": 2352,
2547
+ "##pton": 2353,
2548
+ "##papan": 2354,
2549
+ "##pyid": 2355,
2550
+ "##var": 2356,
2551
+ "##vos": 2357,
2552
+ "##vnik": 2358,
2553
+ "##ấn": 2359,
2554
+ "##fol": 2360,
2555
+ "##fino": 2361,
2556
+ "##ford": 2362,
2557
+ "##ừa_th": 2363,
2558
+ "##dé": 2364,
2559
+ "##dif": 2365,
2560
+ "##68": 2366,
2561
+ "##15": 2367,
2562
+ "##11": 2368,
2563
+ "##ệt": 2369,
2564
+ "##zak": 2370,
2565
+ "##zio": 2371,
2566
+ "##ília": 2372,
2567
+ "##83": 2373,
2568
+ "##ọng": 2374,
2569
+ "##édé": 2375,
2570
+ "##ng_k": 2376,
2571
+ "##ng_chuyển": 2377,
2572
+ "chin": 2378,
2573
+ "chắc": 2379,
2574
+ "chuyên": 2380,
2575
+ "bagdo": 2381,
2576
+ "thử": 2382,
2577
+ "thiết": 2383,
2578
+ "thay": 2384,
2579
+ "thiên": 2385,
2580
+ "thăm": 2386,
2581
+ "thái": 2387,
2582
+ "thị_tr": 2388,
2583
+ "thiểm": 2389,
2584
+ "thừa_th": 2390,
2585
+ "##à_bình": 2391,
2586
+ "giống": 2392,
2587
+ "giảm_giá": 2393,
2588
+ "##_cáo": 2394,
2589
+ "##_phương": 2395,
2590
+ "khời": 2396,
2591
+ "khách": 2397,
2592
+ "##_hạng": 2398,
2593
+ "##_havel": 2399,
2594
+ "##_huế": 2400,
2595
+ "có_hạng": 2401,
2596
+ "nhà": 2402,
2597
+ "nhắc": 2403,
2598
+ "##_thành": 2404,
2599
+ "##_thiểu": 2405,
2600
+ "##iếm": 2406,
2601
+ "##_kiến": 2407,
2602
+ "##_kiếm": 2408,
2603
+ "##_b���": 2409,
2604
+ "##_bản": 2410,
2605
+ "##_chắn": 2411,
2606
+ "##_chop": 2412,
2607
+ "vài": 2413,
2608
+ "phản_lực": 2414,
2609
+ "pheon": 2415,
2610
+ "##_liên": 2416,
2611
+ "##_lỗi": 2417,
2612
+ "##_lake": 2418,
2613
+ "##_loan": 2419,
2614
+ "##_lãnh": 2420,
2615
+ "buch": 2421,
2616
+ "##etol": 2422,
2617
+ "##etts": 2423,
2618
+ "nhất_là": 2424,
2619
+ "hồng_k": 2425,
2620
+ "giờ_địa": 2426,
2621
+ "##in_w": 2427,
2622
+ "##ama": 2428,
2623
+ "##amata": 2429,
2624
+ "##ampton": 2430,
2625
+ "##aii": 2431,
2626
+ "đi_lại": 2432,
2627
+ "##arest": 2433,
2628
+ "##sted": 2434,
2629
+ "##irat": 2435,
2630
+ "##irut": 2436,
2631
+ "bao_giờ": 2437,
2632
+ "##anas": 2438,
2633
+ "##ansted": 2439,
2634
+ "vietravel": 2440,
2635
+ "##ng_tự": 2441,
2636
+ "##ng_tàu": 2442,
2637
+ "##ching": 2443,
2638
+ "tối_đ": 2444,
2639
+ "tối_thiểu": 2445,
2640
+ "##_sao": 2446,
2641
+ "##linois": 2447,
2642
+ "cambrid": 2448,
2643
+ "##_riêng": 2449,
2644
+ "##iên_huế": 2450,
2645
+ "airliness": 2451,
2646
+ "##_đất": 2452,
2647
+ "##_vì": 2453,
2648
+ "##_vọng": 2454,
2649
+ "tuyệt": 2455,
2650
+ "tìm_kiếm": 2456,
2651
+ "washingti": 2457,
2652
+ "##uis": 2458,
2653
+ "louis": 2459,
2654
+ "##leans": 2460,
2655
+ "##ikpapan": 2461,
2656
+ "100": 2462,
2657
+ "106": 2463,
2658
+ "10h30": 2464,
2659
+ "10h1": 2465,
2660
+ "10h24": 2466,
2661
+ "1055": 2467,
2662
+ "10h20": 2468,
2663
+ "1059": 2469,
2664
+ "1083": 2470,
2665
+ "12h": 2471,
2666
+ "121": 2472,
2667
+ "12h30": 2473,
2668
+ "1209": 2474,
2669
+ "1207": 2475,
2670
+ "hoà_bình": 2476,
2671
+ "xin_lỗi": 2477,
2672
+ "trung_chuyển": 2478,
2673
+ "##_diện": 2479,
2674
+ "##iss": 2480,
2675
+ "mười_ba": 2481,
2676
+ "##icino": 2482,
2677
+ "quảng_cáo": 2483,
2678
+ "11h30": 2484,
2679
+ "11h5": 2485,
2680
+ "##adford": 2486,
2681
+ "luton": 2487,
2682
+ "luck": 2488,
2683
+ "lucer": 2489,
2684
+ "##ng_cái": 2490,
2685
+ "##aplet": 2491,
2686
+ "bourne": 2492,
2687
+ "kuching": 2493,
2688
+ "##stern": 2494,
2689
+ "pari": 2495,
2690
+ "sevil": 2496,
2691
+ "279": 2497,
2692
+ "thuộc": 2498,
2693
+ "2022": 2499,
2694
+ "ap58": 2500,
2695
+ "ap68": 2501,
2696
+ "##taa": 2502,
2697
+ "trống_không": 2503,
2698
+ "dubro": 2504,
2699
+ "okay": 2505,
2700
+ "3084": 2506,
2701
+ "doh": 2507,
2702
+ "##ng_qu": 2508,
2703
+ "tương_tự": 2509,
2704
+ "210": 2510,
2705
+ "2153": 2511,
2706
+ "lazio": 2512,
2707
+ "##usetts": 2513,
2708
+ "atl": 2514,
2709
+ "cái": 2515,
2710
+ "a330": 2516,
2711
+ "a350": 2517,
2712
+ "làm_sao": 2518,
2713
+ "##enham": 2519,
2714
+ "tam_đảo": 2520,
2715
+ "tên_riêng": 2521,
2716
+ "19h4": 2522,
2717
+ "312": 2523,
2718
+ "315": 2524,
2719
+ "311": 2525,
2720
+ "##nabh": 2526,
2721
+ "coast": 2527,
2722
+ "inc": 2528,
2723
+ "india": 2529,
2724
+ "459": 2530,
2725
+ "734": 2531,
2726
+ "##eda": 2532,
2727
+ "##eds": 2533,
2728
+ "##edab": 2534,
2729
+ "america": 2535,
2730
+ "art": 2536,
2731
+ "colombo": 2537,
2732
+ "đài_loan": 2538,
2733
+ "##emen": 2539,
2734
+ "stansted": 2540,
2735
+ "staplet": 2541,
2736
+ "tây_ninh": 2542,
2737
+ "vantaa": 2543,
2738
+ "##nty": 2544,
2739
+ "##caster": 2545,
2740
+ "1409": 2546,
2741
+ "7h55": 2547,
2742
+ "7h05": 2548,
2743
+ "72s": 2549,
2744
+ "aero": 2550,
2745
+ "be1": 2551,
2746
+ "beij": 2552,
2747
+ "beirut": 2553,
2748
+ "bradford": 2554,
2749
+ "bremen": 2555,
2750
+ "naypyid": 2556,
2751
+ "norfol": 2557,
2752
+ "tpa": 2558,
2753
+ "vũng_tàu": 2559,
2754
+ "##arson": 2560,
2755
+ "##arszawa": 2561,
2756
+ "##arkand": 2562,
2757
+ "139": 2563,
2758
+ "1373": 2564,
2759
+ "1384": 2565,
2760
+ "229": 2566,
2761
+ "2211": 2567,
2762
+ "297": 2568,
2763
+ "345": 2569,
2764
+ "cardif": 2570,
2765
+ "cathay": 2571,
2766
+ "hamad": 2572,
2767
+ "ninh_bình": 2573,
2768
+ "orleans": 2574,
2769
+ "##_nguyên": 2575,
2770
+ "##see": 2576,
2771
+ "##yazak": 2577,
2772
+ "balikpapan": 2578,
2773
+ "portofino": 2579,
2774
+ "salt_lake": 2580,
2775
+ "18h5": 2581,
2776
+ "ea_là": 2582,
2777
+ "eastern": 2583,
2778
+ "haneda": 2584,
2779
+ "miyazak": 2585,
2780
+ "minnes": 2586,
2781
+ "minsk": 2587,
2782
+ "tiếp_tục": 2588,
2783
+ "xung_qu": 2589,
2784
+ "##shpool": 2590,
2785
+ "##torua": 2591,
2786
+ "indiana": 2592,
2787
+ "7375": 2593,
2788
+ "cao_lãnh": 2594,
2789
+ "hay_là": 2595,
2790
+ "lester": 2596,
2791
+ "leeds": 2597,
2792
+ "##umi": 2598,
2793
+ "##umicino": 2599,
2794
+ "burma": 2600,
2795
+ "##andu": 2601,
2796
+ "mainz": 2602,
2797
+ "382": 2603,
2798
+ "746": 2604,
2799
+ "kolkata": 2605,
2800
+ "midway": 2606,
2801
+ "tàu_bay": 2607,
2802
+ "đại_liên": 2608,
2803
+ "đại_diện": 2609,
2804
+ "bari": 2610,
2805
+ "2704": 2611,
2806
+ "3357": 2612,
2807
+ "402": 2613,
2808
+ "405": 2614,
2809
+ "lhr": 2615,
2810
+ "not": 2616,
2811
+ "novos": 2617,
2812
+ "perugia": 2618,
2813
+ "peretol": 2619,
2814
+ "samui": 2620,
2815
+ "samarkand": 2621,
2816
+ "southwest": 2622,
2817
+ "southampton": 2623,
2818
+ "vì_vậy": 2624,
2819
+ "westchester": 2625,
2820
+ "##nessee": 2626,
2821
+ "##hmandu": 2627,
2822
+ "thích_hợp": 2628,
2823
+ "##alilei": 2629,
2824
+ "21h34": 2630,
2825
+ "2170": 2631,
2826
+ "brasov": 2632,
2827
+ "brasília": 2633,
2828
+ "2h25": 2634,
2829
+ "323": 2635,
2830
+ "4977": 2636,
2831
+ "4400": 2637,
2832
+ "632": 2638,
2833
+ "608": 2639,
2834
+ "798": 2640,
2835
+ "771": 2641,
2836
+ "8415": 2642,
2837
+ "8h16": 2643,
2838
+ "alaska": 2644,
2839
+ "ahmedab": 2645,
2840
+ "bwi": 2646,
2841
+ "biên_hoà": 2647,
2842
+ "b777": 2648,
2843
+ "cạnh": 2649,
2844
+ "cebu": 2650,
2845
+ "cvg": 2651,
2846
+ "cước": 2652,
2847
+ "cẩm_ph": 2653,
2848
+ "county": 2654,
2849
+ "dự_kiến": 2655,
2850
+ "düsseldorf": 2656,
2851
+ "doncaster": 2657,
2852
+ "emirat": 2658,
2853
+ "fiumicino": 2659,
2854
+ "frédé": 2660,
2855
+ "finnai": 2661,
2856
+ "guardia": 2662,
2857
+ "gold": 2663,
2858
+ "goreme": 2664,
2859
+ "galileo_g": 2665,
2860
+ "hy_vọng": 2666,
2861
+ "hà_tiên": 2667,
2862
+ "hawaii": 2668,
2863
+ "hartfield": 2669,
2864
+ "illinois": 2670,
2865
+ "ipoh": 2671,
2866
+ "jfk": 2672,
2867
+ "ký_hiệu": 2673,
2868
+ "kalamata": 2674,
2869
+ "kịch_bản": 2675,
2870
+ "kathmandu": 2676,
2871
+ "kemi": 2677,
2872
+ "kansas": 2678,
2873
+ "lệ_giang": 2679,
2874
+ "lặp": 2680,
2875
+ "long_thành": 2681,
2876
+ "lan_châu": 2682,
2877
+ "lạng_sơn": 2683,
2878
+ "mci": 2684,
2879
+ "móng_cái": 2685,
2880
+ "mùa": 2686,
2881
+ "múi_giờ": 2687,
2882
+ "massach": 2688,
2883
+ "mashad": 2689,
2884
+ "mặt_đất": 2690,
2885
+ "matsuy": 2691,
2886
+ "medan": 2692,
2887
+ "nancy": 2693,
2888
+ "nairobi": 2694,
2889
+ "olso": 2695,
2890
+ "pearson": 2696,
2891
+ "patna": 2697,
2892
+ "padang": 2698,
2893
+ "rotorua": 2699,
2894
+ "rimini": 2700,
2895
+ "suvar": 2701,
2896
+ "shef": 2702,
2897
+ "sfo": 2703,
2898
+ "swiss": 2704,
2899
+ "sầm_sơn": 2705,
2900
+ "tbb": 2706,
2901
+ "tdh": 2707,
2902
+ "tennessee": 2708,
2903
+ "toscana": 2709,
2904
+ "tottenham": 2710,
2905
+ "tower": 2711,
2906
+ "utah": 2712,
2907
+ "václav": 2713,
2908
+ "varanas": 2714,
2909
+ "vị_trí": 2715,
2910
+ "welshpool": 2716,
2911
+ "xuống": 2717,
2912
+ "yyz": 2718,
2913
+ "yangon": 2719,
2914
+ "yokoham": 2720,
2915
+ "áp_dụng": 2721,
2916
+ "ô_tô": 2722,
2917
+ "ôn_châu": 2723,
2918
+ "đều": 2724,
2919
+ "##ibirsk": 2725,
2920
+ "##ric_chop": 2726,
2921
+ "china": 2727,
2922
+ "chắc_chắn": 2728,
2923
+ "bagdogra": 2729,
2924
+ "thiết_bị": 2730,
2925
+ "thay_vì": 2731,
2926
+ "thái_nguyên": 2732,
2927
+ "thị_trấn": 2733,
2928
+ "thừa_thiên_huế": 2734,
2929
+ "pheonix": 2735,
2930
+ "bucharest": 2736,
2931
+ "hồng_kiều": 2737,
2932
+ "giờ_địa_phương": 2738,
2933
+ "##in_warszawa": 2739,
2934
+ "tối_đa": 2740,
2935
+ "cambridge": 2741,
2936
+ "washingtion": 2742,
2937
+ "10h17": 2743,
2938
+ "10h20p": 2744,
2939
+ "11h58": 2745,
2940
+ "lucknow": 2746,
2941
+ "lucerne": 2747,
2942
+ "bournemouth": 2748,
2943
+ "sevilla": 2749,
2944
+ "dubrovnik": 2750,
2945
+ "19h40": 2751,
2946
+ "##nabhumi": 2752,
2947
+ "stapleton": 2753,
2948
+ "beijing": 2754,
2949
+ "naypyidaw": 2755,
2950
+ "norfolk": 2756,
2951
+ "137338": 2757,
2952
+ "cardiff": 2758,
2953
+ "18h50": 2759,
2954
+ "miyazaki": 2760,
2955
+ "minnesota": 2761,
2956
+ "xung_quanh": 2762,
2957
+ "novosibirsk": 2763,
2958
+ "peretola": 2764,
2959
+ "ahmedabad": 2765,
2960
+ "cẩm_phả": 2766,
2961
+ "emirates": 2767,
2962
+ "frédéric_chop": 2768,
2963
+ "finnair": 2769,
2964
+ "galileo_galilei": 2770,
2965
+ "massachusetts": 2771,
2966
+ "matsuyama": 2772,
2967
+ "suvarnabhumi": 2773,
2968
+ "sheffield": 2774,
2969
+ "václav_havel": 2775,
2970
+ "varanasi": 2776,
2971
+ "yokohama": 2777,
2972
+ "frédéric_chopin_warszawa": 2778
2973
+ }
2974
+ }
2975
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "</s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<unk>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<pad>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "<mask>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "<s>",
47
+ "eos_token": "</s>",
48
+ "mask_token": "<mask>",
49
+ "model_max_length": 1000000000000000019884624838656,
50
+ "pad_token": "</s>",
51
+ "sep_token": "</s>",
52
+ "tokenizer_class": "PreTrainedTokenizerFast",
53
+ "unk_token": "<unk>"
54
+ }