minhtriphan
commited on
Commit
•
781473a
1
Parent(s):
f573a4c
Upload 6 files
Browse files- pytorch_model.pt +3 -0
- special_tokens_map.json +7 -0
- tokenizer.json +0 -0
- tokenizer_config.json +15 -0
- train_v1a_0803_1144_seed_1.log +12 -0
- vocab.txt +0 -0
pytorch_model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b159554095dc9e751864cd835be9c3fc5b38d4e5a28a8ca02152a64d87f9e978
|
3 |
+
size 490551170
|
special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"clean_up_tokenization_spaces": true,
|
3 |
+
"cls_token": "[CLS]",
|
4 |
+
"do_basic_tokenize": true,
|
5 |
+
"do_lower_case": true,
|
6 |
+
"mask_token": "[MASK]",
|
7 |
+
"model_max_length": 512,
|
8 |
+
"never_split": null,
|
9 |
+
"pad_token": "[PAD]",
|
10 |
+
"sep_token": "[SEP]",
|
11 |
+
"strip_accents": null,
|
12 |
+
"tokenize_chinese_chars": true,
|
13 |
+
"tokenizer_class": "BertTokenizer",
|
14 |
+
"unk_token": "[UNK]"
|
15 |
+
}
|
train_v1a_0803_1144_seed_1.log
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
11:44:11 {'seed': 1, 'ver': 'v1a', 'use_log': True, 'use_tqdm': True, 'debug': False, 'tokenizer': BertTokenizerFast(name_or_path='tokenizer', vocab_size=52000, model_max_length=512, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=True), 'config': <custom_config.LongBERTConfig object at 0x7f2b78440110>, 'max_len': 45000, 'train_one_part': False, 'gradient_accumulation_steps': 2, 'apex': True, 'device': device(type='cuda', index=1), 'nepochs': 4, 'batch_size': 2, 'num_workers': 128, 'lr': 2e-05, 'weight_decay': 0.01, 'encoder_lr': 2e-05, 'decoder_lr': 0.001, 'min_lr': 1e-06, 'eps': 1e-06, 'betas': (0.9, 0.999), 'scheduler_type': 'cosine', 'num_cycles': 0.5, 'num_warmup_steps': 0.0, 'train_data_dir': 'data/train', 'valid_data_dir': 'data/valid', 'test_data_dir': '.', 'output_dir': 'model/v1/a'}
|
2 |
+
11:44:11 Preparing training materials...
|
3 |
+
11:44:11 Preparing the model...
|
4 |
+
11:44:13 Preparing the dataloaders...
|
5 |
+
05:03:38 Epoch: [1] - Train/Valid Loss: 5.3361/4.9918
|
6 |
+
05:03:38 Saving the model to model/v1/a
|
7 |
+
22:26:24 Epoch: [2] - Train/Valid Loss: 4.9233/4.8049
|
8 |
+
22:26:24 Saving the model to model/v1/a
|
9 |
+
15:48:34 Epoch: [3] - Train/Valid Loss: 4.7910/4.7136
|
10 |
+
15:48:34 Saving the model to model/v1/a
|
11 |
+
09:11:11 Epoch: [4] - Train/Valid Loss: 4.7136/4.6469
|
12 |
+
09:11:11 Saving the model to model/v1/a
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|