daydrill commited on
Commit
d559279
1 Parent(s): 7e093c0

Training in progress, step 500

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-*/
added_tokens.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<td-d>": 32504,
3
+ "<td-du>": 32507,
4
+ "<td-l>": 32503,
5
+ "<td-ld>": 32509,
6
+ "<td-ldu>": 32515,
7
+ "<td-lu>": 32511,
8
+ "<td-r>": 32502,
9
+ "<td-rd>": 32508,
10
+ "<td-rdu>": 32514,
11
+ "<td-rl>": 32506,
12
+ "<td-rld>": 32512,
13
+ "<td-rldu>": 32516,
14
+ "<td-rlu>": 32513,
15
+ "<td-ru>": 32510,
16
+ "<td-u>": 32505,
17
+ "<td>": 32501,
18
+ "<tr>": 32500
19
+ }
config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "monologg/kobigbird-bert-base",
3
+ "architectures": [
4
+ "BigBirdForQuestionAnswering"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "attention_type": "block_sparse",
8
+ "block_size": 64,
9
+ "bos_token_id": 5,
10
+ "classifier_dropout": null,
11
+ "eos_token_id": 6,
12
+ "gradient_checkpointing": false,
13
+ "hidden_act": "gelu_new",
14
+ "hidden_dropout_prob": 0.1,
15
+ "hidden_size": 768,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 3072,
18
+ "layer_norm_eps": 1e-12,
19
+ "max_position_embeddings": 4096,
20
+ "model_type": "big_bird",
21
+ "num_attention_heads": 12,
22
+ "num_hidden_layers": 12,
23
+ "num_random_blocks": 3,
24
+ "pad_token_id": 0,
25
+ "position_embedding_type": "absolute",
26
+ "rescale_embeddings": false,
27
+ "sep_token_id": 3,
28
+ "tokenizer_class": "BertTokenizer",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.25.1",
31
+ "type_vocab_size": 2,
32
+ "use_bias": true,
33
+ "use_cache": true,
34
+ "vocab_size": 32517
35
+ }
last-checkpoint/added_tokens.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<td-d>": 32504,
3
+ "<td-du>": 32507,
4
+ "<td-l>": 32503,
5
+ "<td-ld>": 32509,
6
+ "<td-ldu>": 32515,
7
+ "<td-lu>": 32511,
8
+ "<td-r>": 32502,
9
+ "<td-rd>": 32508,
10
+ "<td-rdu>": 32514,
11
+ "<td-rl>": 32506,
12
+ "<td-rld>": 32512,
13
+ "<td-rldu>": 32516,
14
+ "<td-rlu>": 32513,
15
+ "<td-ru>": 32510,
16
+ "<td-u>": 32505,
17
+ "<td>": 32501,
18
+ "<tr>": 32500
19
+ }
last-checkpoint/config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "monologg/kobigbird-bert-base",
3
+ "architectures": [
4
+ "BigBirdForQuestionAnswering"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "attention_type": "block_sparse",
8
+ "block_size": 64,
9
+ "bos_token_id": 5,
10
+ "classifier_dropout": null,
11
+ "eos_token_id": 6,
12
+ "gradient_checkpointing": false,
13
+ "hidden_act": "gelu_new",
14
+ "hidden_dropout_prob": 0.1,
15
+ "hidden_size": 768,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 3072,
18
+ "layer_norm_eps": 1e-12,
19
+ "max_position_embeddings": 4096,
20
+ "model_type": "big_bird",
21
+ "num_attention_heads": 12,
22
+ "num_hidden_layers": 12,
23
+ "num_random_blocks": 3,
24
+ "pad_token_id": 0,
25
+ "position_embedding_type": "absolute",
26
+ "rescale_embeddings": false,
27
+ "sep_token_id": 3,
28
+ "tokenizer_class": "BertTokenizer",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.25.1",
31
+ "type_vocab_size": 2,
32
+ "use_bias": true,
33
+ "use_cache": true,
34
+ "vocab_size": 32517
35
+ }
last-checkpoint/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20ab356fa18f110500a5c92dc47dd1e4511f4ece5e195bf6f39801135def9e7a
3
+ size 943333453
last-checkpoint/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1aace261b8e6cad923ee6661d1c7271017bc8d22f018c02ff43351ce6bc4e31
3
+ size 471708325
last-checkpoint/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bc19aba973831004a77f20c88b8e3e066a8a55403bc0ad8b246efb14f309ecd
3
+ size 14567
last-checkpoint/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:586f9b073107d2287ab918b8e3a6c9ec40a7767baa6b65a691f978a7a7ba2ab5
3
+ size 623
last-checkpoint/special_tokens_map.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<tr>",
4
+ "<td>",
5
+ "<td-r>",
6
+ "<td-l>",
7
+ "<td-d>",
8
+ "<td-u>",
9
+ "<td-rl>",
10
+ "<td-du>",
11
+ "<td-rd>",
12
+ "<td-ld>",
13
+ "<td-ru>",
14
+ "<td-lu>",
15
+ "<td-rld>",
16
+ "<td-rlu>",
17
+ "<td-rdu>",
18
+ "<td-ldu>",
19
+ "<td-rldu>"
20
+ ],
21
+ "bos_token": "<s>",
22
+ "cls_token": "[CLS]",
23
+ "eos_token": "</s>",
24
+ "mask_token": "[MASK]",
25
+ "pad_token": "[PAD]",
26
+ "sep_token": "[SEP]",
27
+ "unk_token": "[UNK]"
28
+ }
last-checkpoint/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
last-checkpoint/tokenizer_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "[CLS]",
4
+ "do_basic_tokenize": true,
5
+ "do_lower_case": false,
6
+ "eos_token": "</s>",
7
+ "mask_token": "[MASK]",
8
+ "model_max_length": 4096,
9
+ "name_or_path": "monologg/kobigbird-bert-base",
10
+ "never_split": null,
11
+ "pad_token": "[PAD]",
12
+ "sep_token": "[SEP]",
13
+ "special_tokens_map_file": "/home/ec2-user/.cache/huggingface/hub/models--monologg--kobigbird-bert-base/snapshots/ceacda477e20abef2c929adfa4a07c6f811323be/special_tokens_map.json",
14
+ "strip_accents": null,
15
+ "tokenize_chinese_chars": true,
16
+ "tokenizer_class": "BertTokenizer",
17
+ "unk_token": "[UNK]"
18
+ }
last-checkpoint/trainer_state.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.008156606851549755,
5
+ "global_step": 500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.01,
12
+ "learning_rate": 2.9918433931484505e-05,
13
+ "loss": 3.6749,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.01,
18
+ "eval_exact_match": 25.56077203964528,
19
+ "eval_f1": 30.48757258245567,
20
+ "eval_loss": 2.4391441345214844,
21
+ "eval_runtime": 1683.1549,
22
+ "eval_samples_per_second": 6.834,
23
+ "eval_steps_per_second": 6.834,
24
+ "step": 500
25
+ }
26
+ ],
27
+ "max_steps": 183900,
28
+ "num_train_epochs": 3,
29
+ "total_flos": 1103235145728000.0,
30
+ "trial_name": null,
31
+ "trial_params": null
32
+ }
last-checkpoint/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce3df7dacb746d38927d7b4b5c96e23b1180109e65ab3c5eda16560beab01da4
3
+ size 3439
last-checkpoint/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
nbest_predictions.json ADDED
The diff for this file is too large to render. See raw diff
 
predictions.json ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1aace261b8e6cad923ee6661d1c7271017bc8d22f018c02ff43351ce6bc4e31
3
+ size 471708325
special_tokens_map.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<tr>",
4
+ "<td>",
5
+ "<td-r>",
6
+ "<td-l>",
7
+ "<td-d>",
8
+ "<td-u>",
9
+ "<td-rl>",
10
+ "<td-du>",
11
+ "<td-rd>",
12
+ "<td-ld>",
13
+ "<td-ru>",
14
+ "<td-lu>",
15
+ "<td-rld>",
16
+ "<td-rlu>",
17
+ "<td-rdu>",
18
+ "<td-ldu>",
19
+ "<td-rldu>"
20
+ ],
21
+ "bos_token": "<s>",
22
+ "cls_token": "[CLS]",
23
+ "eos_token": "</s>",
24
+ "mask_token": "[MASK]",
25
+ "pad_token": "[PAD]",
26
+ "sep_token": "[SEP]",
27
+ "unk_token": "[UNK]"
28
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "[CLS]",
4
+ "do_basic_tokenize": true,
5
+ "do_lower_case": false,
6
+ "eos_token": "</s>",
7
+ "mask_token": "[MASK]",
8
+ "model_max_length": 4096,
9
+ "name_or_path": "monologg/kobigbird-bert-base",
10
+ "never_split": null,
11
+ "pad_token": "[PAD]",
12
+ "sep_token": "[SEP]",
13
+ "special_tokens_map_file": "/home/ec2-user/.cache/huggingface/hub/models--monologg--kobigbird-bert-base/snapshots/ceacda477e20abef2c929adfa4a07c6f811323be/special_tokens_map.json",
14
+ "strip_accents": null,
15
+ "tokenize_chinese_chars": true,
16
+ "tokenizer_class": "BertTokenizer",
17
+ "unk_token": "[UNK]"
18
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce3df7dacb746d38927d7b4b5c96e23b1180109e65ab3c5eda16560beab01da4
3
+ size 3439
vocab.txt ADDED
The diff for this file is too large to render. See raw diff