Commit
·
f829f49
1
Parent(s):
e68c39f
models done
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +2 -0
- checkpoint-1000/config.json +25 -0
- checkpoint-1000/optimizer.pt +3 -0
- checkpoint-1000/pytorch_model.bin +3 -0
- checkpoint-1000/rng_state.pth +0 -0
- checkpoint-1000/scheduler.pt +0 -0
- checkpoint-1000/special_tokens_map.json +1 -0
- checkpoint-1000/tokenizer.json +0 -0
- checkpoint-1000/tokenizer_config.json +1 -0
- checkpoint-1000/trainer_state.json +28 -0
- checkpoint-1000/training_args.bin +0 -0
- checkpoint-1000/vocab.txt +0 -0
- checkpoint-1500/config.json +25 -0
- checkpoint-1500/optimizer.pt +3 -0
- checkpoint-1500/pytorch_model.bin +3 -0
- checkpoint-1500/rng_state.pth +0 -0
- checkpoint-1500/scheduler.pt +0 -0
- checkpoint-1500/special_tokens_map.json +1 -0
- checkpoint-1500/tokenizer.json +0 -0
- checkpoint-1500/tokenizer_config.json +1 -0
- checkpoint-1500/trainer_state.json +34 -0
- checkpoint-1500/training_args.bin +0 -0
- checkpoint-1500/vocab.txt +0 -0
- checkpoint-2000/config.json +25 -0
- checkpoint-2000/optimizer.pt +3 -0
- checkpoint-2000/pytorch_model.bin +3 -0
- checkpoint-2000/rng_state.pth +0 -0
- checkpoint-2000/scheduler.pt +0 -0
- checkpoint-2000/special_tokens_map.json +1 -0
- checkpoint-2000/tokenizer.json +0 -0
- checkpoint-2000/tokenizer_config.json +1 -0
- checkpoint-2000/trainer_state.json +40 -0
- checkpoint-2000/training_args.bin +0 -0
- checkpoint-2000/vocab.txt +0 -0
- checkpoint-2500/config.json +25 -0
- checkpoint-2500/optimizer.pt +3 -0
- checkpoint-2500/pytorch_model.bin +3 -0
- checkpoint-2500/rng_state.pth +0 -0
- checkpoint-2500/scheduler.pt +0 -0
- checkpoint-2500/special_tokens_map.json +1 -0
- checkpoint-2500/tokenizer.json +0 -0
- checkpoint-2500/tokenizer_config.json +1 -0
- checkpoint-2500/trainer_state.json +46 -0
- checkpoint-2500/training_args.bin +0 -0
- checkpoint-2500/vocab.txt +0 -0
- checkpoint-3000/config.json +25 -0
- checkpoint-3000/optimizer.pt +3 -0
- checkpoint-3000/pytorch_model.bin +3 -0
- checkpoint-3000/rng_state.pth +0 -0
- checkpoint-3000/scheduler.pt +0 -0
.gitattributes
CHANGED
@@ -25,3 +25,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
25 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
26 |
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
27 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
25 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
26 |
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
27 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*/optimizer.pt filter=lfs diff=lfs merge=lfs -text
|
checkpoint-1000/config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "distilbert-base-uncased",
|
3 |
+
"activation": "gelu",
|
4 |
+
"architectures": [
|
5 |
+
"DistilBertForSequenceClassification"
|
6 |
+
],
|
7 |
+
"attention_dropout": 0.1,
|
8 |
+
"dim": 768,
|
9 |
+
"dropout": 0.1,
|
10 |
+
"hidden_dim": 3072,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"max_position_embeddings": 512,
|
13 |
+
"model_type": "distilbert",
|
14 |
+
"n_heads": 12,
|
15 |
+
"n_layers": 6,
|
16 |
+
"pad_token_id": 0,
|
17 |
+
"problem_type": "single_label_classification",
|
18 |
+
"qa_dropout": 0.1,
|
19 |
+
"seq_classif_dropout": 0.2,
|
20 |
+
"sinusoidal_pos_embds": false,
|
21 |
+
"tie_weights_": true,
|
22 |
+
"torch_dtype": "float32",
|
23 |
+
"transformers_version": "4.13.0.dev0",
|
24 |
+
"vocab_size": 30522
|
25 |
+
}
|
checkpoint-1000/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e9a6ed83b3ab9e114c56f7a65febbe3f919a03a78e470549f0ffd5226316690a
|
3 |
+
size 535703734
|
checkpoint-1000/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b5373e04441bec36f1b43b1b54f5c0167cc1ae9b51f30db30f1653080dec069
|
3 |
+
size 267862159
|
checkpoint-1000/rng_state.pth
ADDED
Binary file (14.6 kB). View file
|
|
checkpoint-1000/scheduler.pt
ADDED
Binary file (623 Bytes). View file
|
|
checkpoint-1000/special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
|
checkpoint-1000/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-1000/tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "distilbert-base-uncased", "tokenizer_class": "DistilBertTokenizer"}
|
checkpoint-1000/trainer_state.json
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.6397952655150352,
|
5 |
+
"global_step": 1000,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 0.32,
|
12 |
+
"learning_rate": 1.872040946896993e-05,
|
13 |
+
"loss": 0.3165,
|
14 |
+
"step": 500
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"epoch": 0.64,
|
18 |
+
"learning_rate": 1.744081893793986e-05,
|
19 |
+
"loss": 0.2508,
|
20 |
+
"step": 1000
|
21 |
+
}
|
22 |
+
],
|
23 |
+
"max_steps": 7815,
|
24 |
+
"num_train_epochs": 5,
|
25 |
+
"total_flos": 2100332699784000.0,
|
26 |
+
"trial_name": null,
|
27 |
+
"trial_params": null
|
28 |
+
}
|
checkpoint-1000/training_args.bin
ADDED
Binary file (2.8 kB). View file
|
|
checkpoint-1000/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-1500/config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "distilbert-base-uncased",
|
3 |
+
"activation": "gelu",
|
4 |
+
"architectures": [
|
5 |
+
"DistilBertForSequenceClassification"
|
6 |
+
],
|
7 |
+
"attention_dropout": 0.1,
|
8 |
+
"dim": 768,
|
9 |
+
"dropout": 0.1,
|
10 |
+
"hidden_dim": 3072,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"max_position_embeddings": 512,
|
13 |
+
"model_type": "distilbert",
|
14 |
+
"n_heads": 12,
|
15 |
+
"n_layers": 6,
|
16 |
+
"pad_token_id": 0,
|
17 |
+
"problem_type": "single_label_classification",
|
18 |
+
"qa_dropout": 0.1,
|
19 |
+
"seq_classif_dropout": 0.2,
|
20 |
+
"sinusoidal_pos_embds": false,
|
21 |
+
"tie_weights_": true,
|
22 |
+
"torch_dtype": "float32",
|
23 |
+
"transformers_version": "4.13.0.dev0",
|
24 |
+
"vocab_size": 30522
|
25 |
+
}
|
checkpoint-1500/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:82785261bcd8fd5d590d2f8fcf3be31380284a51a02495b5beb18aabbefc6e34
|
3 |
+
size 535703734
|
checkpoint-1500/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57ea5b2718e0880ecbf85637530a68bf5ba2d46a0a3e353d602541378599722d
|
3 |
+
size 267862159
|
checkpoint-1500/rng_state.pth
ADDED
Binary file (14.6 kB). View file
|
|
checkpoint-1500/scheduler.pt
ADDED
Binary file (623 Bytes). View file
|
|
checkpoint-1500/special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
|
checkpoint-1500/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-1500/tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "distilbert-base-uncased", "tokenizer_class": "DistilBertTokenizer"}
|
checkpoint-1500/trainer_state.json
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9596928982725528,
|
5 |
+
"global_step": 1500,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 0.32,
|
12 |
+
"learning_rate": 1.872040946896993e-05,
|
13 |
+
"loss": 0.3165,
|
14 |
+
"step": 500
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"epoch": 0.64,
|
18 |
+
"learning_rate": 1.744081893793986e-05,
|
19 |
+
"loss": 0.2508,
|
20 |
+
"step": 1000
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"epoch": 0.96,
|
24 |
+
"learning_rate": 1.616122840690979e-05,
|
25 |
+
"loss": 0.2268,
|
26 |
+
"step": 1500
|
27 |
+
}
|
28 |
+
],
|
29 |
+
"max_steps": 7815,
|
30 |
+
"num_train_epochs": 5,
|
31 |
+
"total_flos": 3150811589944704.0,
|
32 |
+
"trial_name": null,
|
33 |
+
"trial_params": null
|
34 |
+
}
|
checkpoint-1500/training_args.bin
ADDED
Binary file (2.8 kB). View file
|
|
checkpoint-1500/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-2000/config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "distilbert-base-uncased",
|
3 |
+
"activation": "gelu",
|
4 |
+
"architectures": [
|
5 |
+
"DistilBertForSequenceClassification"
|
6 |
+
],
|
7 |
+
"attention_dropout": 0.1,
|
8 |
+
"dim": 768,
|
9 |
+
"dropout": 0.1,
|
10 |
+
"hidden_dim": 3072,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"max_position_embeddings": 512,
|
13 |
+
"model_type": "distilbert",
|
14 |
+
"n_heads": 12,
|
15 |
+
"n_layers": 6,
|
16 |
+
"pad_token_id": 0,
|
17 |
+
"problem_type": "single_label_classification",
|
18 |
+
"qa_dropout": 0.1,
|
19 |
+
"seq_classif_dropout": 0.2,
|
20 |
+
"sinusoidal_pos_embds": false,
|
21 |
+
"tie_weights_": true,
|
22 |
+
"torch_dtype": "float32",
|
23 |
+
"transformers_version": "4.13.0.dev0",
|
24 |
+
"vocab_size": 30522
|
25 |
+
}
|
checkpoint-2000/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ada633d4e891b7e174167d603b755ba4ee2726d72248590366b5df08a8ae6ddf
|
3 |
+
size 535703734
|
checkpoint-2000/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2848ad231f58ff0e4c095178198bf5b1012192e98a9ae9b864cf4123f1402cbd
|
3 |
+
size 267862159
|
checkpoint-2000/rng_state.pth
ADDED
Binary file (14.6 kB). View file
|
|
checkpoint-2000/scheduler.pt
ADDED
Binary file (623 Bytes). View file
|
|
checkpoint-2000/special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
|
checkpoint-2000/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-2000/tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "distilbert-base-uncased", "tokenizer_class": "DistilBertTokenizer"}
|
checkpoint-2000/trainer_state.json
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.2795905310300704,
|
5 |
+
"global_step": 2000,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 0.32,
|
12 |
+
"learning_rate": 1.872040946896993e-05,
|
13 |
+
"loss": 0.3165,
|
14 |
+
"step": 500
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"epoch": 0.64,
|
18 |
+
"learning_rate": 1.744081893793986e-05,
|
19 |
+
"loss": 0.2508,
|
20 |
+
"step": 1000
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"epoch": 0.96,
|
24 |
+
"learning_rate": 1.616122840690979e-05,
|
25 |
+
"loss": 0.2268,
|
26 |
+
"step": 1500
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"epoch": 1.28,
|
30 |
+
"learning_rate": 1.488163787587972e-05,
|
31 |
+
"loss": 0.1718,
|
32 |
+
"step": 2000
|
33 |
+
}
|
34 |
+
],
|
35 |
+
"max_steps": 7815,
|
36 |
+
"num_train_epochs": 5,
|
37 |
+
"total_flos": 4193866096371360.0,
|
38 |
+
"trial_name": null,
|
39 |
+
"trial_params": null
|
40 |
+
}
|
checkpoint-2000/training_args.bin
ADDED
Binary file (2.8 kB). View file
|
|
checkpoint-2000/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-2500/config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "distilbert-base-uncased",
|
3 |
+
"activation": "gelu",
|
4 |
+
"architectures": [
|
5 |
+
"DistilBertForSequenceClassification"
|
6 |
+
],
|
7 |
+
"attention_dropout": 0.1,
|
8 |
+
"dim": 768,
|
9 |
+
"dropout": 0.1,
|
10 |
+
"hidden_dim": 3072,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"max_position_embeddings": 512,
|
13 |
+
"model_type": "distilbert",
|
14 |
+
"n_heads": 12,
|
15 |
+
"n_layers": 6,
|
16 |
+
"pad_token_id": 0,
|
17 |
+
"problem_type": "single_label_classification",
|
18 |
+
"qa_dropout": 0.1,
|
19 |
+
"seq_classif_dropout": 0.2,
|
20 |
+
"sinusoidal_pos_embds": false,
|
21 |
+
"tie_weights_": true,
|
22 |
+
"torch_dtype": "float32",
|
23 |
+
"transformers_version": "4.13.0.dev0",
|
24 |
+
"vocab_size": 30522
|
25 |
+
}
|
checkpoint-2500/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f5bfd363b18260da50c9aaaff5b38ded1fcb935ea57abf639b2db9bf3e94894
|
3 |
+
size 535703734
|
checkpoint-2500/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:713888053243ba822f673ec9bb61bee035beefd371a31ce9637b668d4c196fd0
|
3 |
+
size 267862159
|
checkpoint-2500/rng_state.pth
ADDED
Binary file (14.6 kB). View file
|
|
checkpoint-2500/scheduler.pt
ADDED
Binary file (623 Bytes). View file
|
|
checkpoint-2500/special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
|
checkpoint-2500/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-2500/tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "distilbert-base-uncased", "tokenizer_class": "DistilBertTokenizer"}
|
checkpoint-2500/trainer_state.json
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.599488163787588,
|
5 |
+
"global_step": 2500,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 0.32,
|
12 |
+
"learning_rate": 1.872040946896993e-05,
|
13 |
+
"loss": 0.3165,
|
14 |
+
"step": 500
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"epoch": 0.64,
|
18 |
+
"learning_rate": 1.744081893793986e-05,
|
19 |
+
"loss": 0.2508,
|
20 |
+
"step": 1000
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"epoch": 0.96,
|
24 |
+
"learning_rate": 1.616122840690979e-05,
|
25 |
+
"loss": 0.2268,
|
26 |
+
"step": 1500
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"epoch": 1.28,
|
30 |
+
"learning_rate": 1.488163787587972e-05,
|
31 |
+
"loss": 0.1718,
|
32 |
+
"step": 2000
|
33 |
+
},
|
34 |
+
{
|
35 |
+
"epoch": 1.6,
|
36 |
+
"learning_rate": 1.3602047344849649e-05,
|
37 |
+
"loss": 0.1513,
|
38 |
+
"step": 2500
|
39 |
+
}
|
40 |
+
],
|
41 |
+
"max_steps": 7815,
|
42 |
+
"num_train_epochs": 5,
|
43 |
+
"total_flos": 5244957648250848.0,
|
44 |
+
"trial_name": null,
|
45 |
+
"trial_params": null
|
46 |
+
}
|
checkpoint-2500/training_args.bin
ADDED
Binary file (2.8 kB). View file
|
|
checkpoint-2500/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-3000/config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "distilbert-base-uncased",
|
3 |
+
"activation": "gelu",
|
4 |
+
"architectures": [
|
5 |
+
"DistilBertForSequenceClassification"
|
6 |
+
],
|
7 |
+
"attention_dropout": 0.1,
|
8 |
+
"dim": 768,
|
9 |
+
"dropout": 0.1,
|
10 |
+
"hidden_dim": 3072,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"max_position_embeddings": 512,
|
13 |
+
"model_type": "distilbert",
|
14 |
+
"n_heads": 12,
|
15 |
+
"n_layers": 6,
|
16 |
+
"pad_token_id": 0,
|
17 |
+
"problem_type": "single_label_classification",
|
18 |
+
"qa_dropout": 0.1,
|
19 |
+
"seq_classif_dropout": 0.2,
|
20 |
+
"sinusoidal_pos_embds": false,
|
21 |
+
"tie_weights_": true,
|
22 |
+
"torch_dtype": "float32",
|
23 |
+
"transformers_version": "4.13.0.dev0",
|
24 |
+
"vocab_size": 30522
|
25 |
+
}
|
checkpoint-3000/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:def467cfc678d17099532ea2048fcb80128927260f229731756452379dd9f140
|
3 |
+
size 535703734
|
checkpoint-3000/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4fc0511c28da4071afe36a515e99ff908f5376ad2a6e7a6cc27c0afe9f8a6eeb
|
3 |
+
size 267862159
|
checkpoint-3000/rng_state.pth
ADDED
Binary file (14.6 kB). View file
|
|
checkpoint-3000/scheduler.pt
ADDED
Binary file (623 Bytes). View file
|
|