Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- deberta-v3-finetuned/fold_0/0/checkpoint-500/config.json +42 -0
- deberta-v3-finetuned/fold_0/0/checkpoint-500/optimizer.pt +3 -0
- deberta-v3-finetuned/fold_0/0/checkpoint-500/pytorch_model.bin +3 -0
- deberta-v3-finetuned/fold_0/0/checkpoint-500/rng_state.pth +3 -0
- deberta-v3-finetuned/fold_0/0/checkpoint-500/scheduler.pt +3 -0
- deberta-v3-finetuned/fold_0/0/checkpoint-500/special_tokens_map.json +9 -0
- deberta-v3-finetuned/fold_0/0/checkpoint-500/tokenizer.json +0 -0
- deberta-v3-finetuned/fold_0/0/checkpoint-500/tokenizer_config.json +16 -0
- deberta-v3-finetuned/fold_0/0/checkpoint-500/trainer_state.json +70 -0
- deberta-v3-finetuned/fold_0/0/checkpoint-500/training_args.bin +3 -0
- deberta-v3-finetuned/fold_0/config.json +42 -0
- deberta-v3-finetuned/fold_0/pytorch_model.bin +3 -0
- deberta-v3-finetuned/fold_0/special_tokens_map.json +9 -0
- deberta-v3-finetuned/fold_0/tokenizer.json +0 -0
- deberta-v3-finetuned/fold_0/tokenizer_config.json +16 -0
- deberta-v3-finetuned/fold_1/1/checkpoint-1000/config.json +42 -0
- deberta-v3-finetuned/fold_1/1/checkpoint-1000/optimizer.pt +3 -0
- deberta-v3-finetuned/fold_1/1/checkpoint-1000/pytorch_model.bin +3 -0
- deberta-v3-finetuned/fold_1/1/checkpoint-1000/rng_state.pth +3 -0
- deberta-v3-finetuned/fold_1/1/checkpoint-1000/scheduler.pt +3 -0
- deberta-v3-finetuned/fold_1/1/checkpoint-1000/special_tokens_map.json +9 -0
- deberta-v3-finetuned/fold_1/1/checkpoint-1000/tokenizer.json +0 -0
- deberta-v3-finetuned/fold_1/1/checkpoint-1000/tokenizer_config.json +16 -0
- deberta-v3-finetuned/fold_1/1/checkpoint-1000/trainer_state.json +121 -0
- deberta-v3-finetuned/fold_1/1/checkpoint-1000/training_args.bin +3 -0
- deberta-v3-finetuned/fold_1/config.json +42 -0
- deberta-v3-finetuned/fold_1/pytorch_model.bin +3 -0
- deberta-v3-finetuned/fold_1/special_tokens_map.json +9 -0
- deberta-v3-finetuned/fold_1/tokenizer.json +0 -0
- deberta-v3-finetuned/fold_1/tokenizer_config.json +16 -0
- deberta-v3-finetuned/fold_2/2/checkpoint-400/config.json +42 -0
- deberta-v3-finetuned/fold_2/2/checkpoint-400/optimizer.pt +3 -0
- deberta-v3-finetuned/fold_2/2/checkpoint-400/pytorch_model.bin +3 -0
- deberta-v3-finetuned/fold_2/2/checkpoint-400/rng_state.pth +3 -0
- deberta-v3-finetuned/fold_2/2/checkpoint-400/scheduler.pt +3 -0
- deberta-v3-finetuned/fold_2/2/checkpoint-400/special_tokens_map.json +9 -0
- deberta-v3-finetuned/fold_2/2/checkpoint-400/tokenizer.json +0 -0
- deberta-v3-finetuned/fold_2/2/checkpoint-400/tokenizer_config.json +16 -0
- deberta-v3-finetuned/fold_2/2/checkpoint-400/trainer_state.json +55 -0
- deberta-v3-finetuned/fold_2/2/checkpoint-400/training_args.bin +3 -0
- deberta-v3-finetuned/fold_2/config.json +42 -0
- deberta-v3-finetuned/fold_2/pytorch_model.bin +3 -0
- deberta-v3-finetuned/fold_2/special_tokens_map.json +9 -0
- deberta-v3-finetuned/fold_2/tokenizer.json +0 -0
- deberta-v3-finetuned/fold_2/tokenizer_config.json +16 -0
- deberta-v3-finetuned/fold_3/3/checkpoint-100/config.json +42 -0
- deberta-v3-finetuned/fold_3/3/checkpoint-100/optimizer.pt +3 -0
- deberta-v3-finetuned/fold_3/3/checkpoint-100/pytorch_model.bin +3 -0
- deberta-v3-finetuned/fold_3/3/checkpoint-100/rng_state.pth +3 -0
- deberta-v3-finetuned/fold_3/3/checkpoint-100/scheduler.pt +3 -0
deberta-v3-finetuned/fold_0/0/checkpoint-500/config.json
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "/gpfs/home/jc3821/kaggle/microsoft-deberta-v3-large",
|
3 |
+
"architectures": [
|
4 |
+
"DebertaV2ForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.007,
|
7 |
+
"hidden_act": "gelu",
|
8 |
+
"hidden_dropout_prob": 0.007,
|
9 |
+
"hidden_size": 1024,
|
10 |
+
"id2label": {
|
11 |
+
"0": "LABEL_0"
|
12 |
+
},
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 4096,
|
15 |
+
"label2id": {
|
16 |
+
"LABEL_0": 0
|
17 |
+
},
|
18 |
+
"layer_norm_eps": 1e-07,
|
19 |
+
"max_position_embeddings": 512,
|
20 |
+
"max_relative_positions": -1,
|
21 |
+
"model_type": "deberta-v2",
|
22 |
+
"norm_rel_ebd": "layer_norm",
|
23 |
+
"num_attention_heads": 16,
|
24 |
+
"num_hidden_layers": 24,
|
25 |
+
"pad_token_id": 0,
|
26 |
+
"pooler_dropout": 0,
|
27 |
+
"pooler_hidden_act": "gelu",
|
28 |
+
"pooler_hidden_size": 1024,
|
29 |
+
"pos_att_type": [
|
30 |
+
"p2c",
|
31 |
+
"c2p"
|
32 |
+
],
|
33 |
+
"position_biased_input": false,
|
34 |
+
"position_buckets": 256,
|
35 |
+
"problem_type": "regression",
|
36 |
+
"relative_attention": true,
|
37 |
+
"share_att_key": true,
|
38 |
+
"torch_dtype": "float32",
|
39 |
+
"transformers_version": "4.32.1",
|
40 |
+
"type_vocab_size": 0,
|
41 |
+
"vocab_size": 128100
|
42 |
+
}
|
deberta-v3-finetuned/fold_0/0/checkpoint-500/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ce4b3555aaabd6ebf364eef18675e77db495a9c88a5af48454ebfefcddf1b52
|
3 |
+
size 3480831547
|
deberta-v3-finetuned/fold_0/0/checkpoint-500/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bdc5261521c4981b187281dea24a24d8290fd2c4e222a5efff1cf0e9234c7cf5
|
3 |
+
size 1740387701
|
deberta-v3-finetuned/fold_0/0/checkpoint-500/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:51783737039aaae77df9f6cc876318bdb54431cf6e9bffdfbb995a59239ef270
|
3 |
+
size 14575
|
deberta-v3-finetuned/fold_0/0/checkpoint-500/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af2fa603561d1610ba73b457cac52ea6a0ab7ffa9c9c41a75a141811fc0185a3
|
3 |
+
size 627
|
deberta-v3-finetuned/fold_0/0/checkpoint-500/special_tokens_map.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "[CLS]",
|
3 |
+
"cls_token": "[CLS]",
|
4 |
+
"eos_token": "[SEP]",
|
5 |
+
"mask_token": "[MASK]",
|
6 |
+
"pad_token": "[PAD]",
|
7 |
+
"sep_token": "[SEP]",
|
8 |
+
"unk_token": "[UNK]"
|
9 |
+
}
|
deberta-v3-finetuned/fold_0/0/checkpoint-500/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
deberta-v3-finetuned/fold_0/0/checkpoint-500/tokenizer_config.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "[CLS]",
|
3 |
+
"clean_up_tokenization_spaces": true,
|
4 |
+
"cls_token": "[CLS]",
|
5 |
+
"do_lower_case": false,
|
6 |
+
"eos_token": "[SEP]",
|
7 |
+
"mask_token": "[MASK]",
|
8 |
+
"model_max_length": 1000000000000000019884624838656,
|
9 |
+
"pad_token": "[PAD]",
|
10 |
+
"sep_token": "[SEP]",
|
11 |
+
"sp_model_kwargs": {},
|
12 |
+
"split_by_punct": false,
|
13 |
+
"tokenizer_class": "DebertaV2Tokenizer",
|
14 |
+
"unk_token": "[UNK]",
|
15 |
+
"vocab_type": "spm"
|
16 |
+
}
|
deberta-v3-finetuned/fold_0/0/checkpoint-500/trainer_state.json
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.4025963544845581,
|
3 |
+
"best_model_checkpoint": "/gpfs/home/jc3821/kaggle/content/deberta-v3-finetuned/fold_0/0/checkpoint-500",
|
4 |
+
"epoch": 1.953125,
|
5 |
+
"eval_steps": 100,
|
6 |
+
"global_step": 500,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.39,
|
13 |
+
"eval_loss": 0.20960840582847595,
|
14 |
+
"eval_rmse": 0.4578301012516022,
|
15 |
+
"eval_runtime": 22.7398,
|
16 |
+
"eval_samples_per_second": 90.458,
|
17 |
+
"eval_steps_per_second": 11.346,
|
18 |
+
"step": 100
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"epoch": 0.78,
|
22 |
+
"eval_loss": 0.23187659680843353,
|
23 |
+
"eval_rmse": 0.4815356731414795,
|
24 |
+
"eval_runtime": 22.6916,
|
25 |
+
"eval_samples_per_second": 90.65,
|
26 |
+
"eval_steps_per_second": 11.37,
|
27 |
+
"step": 200
|
28 |
+
},
|
29 |
+
{
|
30 |
+
"epoch": 1.17,
|
31 |
+
"eval_loss": 0.16903835535049438,
|
32 |
+
"eval_rmse": 0.4111427366733551,
|
33 |
+
"eval_runtime": 22.6924,
|
34 |
+
"eval_samples_per_second": 90.647,
|
35 |
+
"eval_steps_per_second": 11.369,
|
36 |
+
"step": 300
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"epoch": 1.56,
|
40 |
+
"eval_loss": 0.3062863349914551,
|
41 |
+
"eval_rmse": 0.5534313917160034,
|
42 |
+
"eval_runtime": 22.6866,
|
43 |
+
"eval_samples_per_second": 90.67,
|
44 |
+
"eval_steps_per_second": 11.372,
|
45 |
+
"step": 400
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"epoch": 1.95,
|
49 |
+
"learning_rate": 9.140625e-06,
|
50 |
+
"loss": 0.2205,
|
51 |
+
"step": 500
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 1.95,
|
55 |
+
"eval_loss": 0.16208384931087494,
|
56 |
+
"eval_rmse": 0.4025963544845581,
|
57 |
+
"eval_runtime": 22.6795,
|
58 |
+
"eval_samples_per_second": 90.699,
|
59 |
+
"eval_steps_per_second": 11.376,
|
60 |
+
"step": 500
|
61 |
+
}
|
62 |
+
],
|
63 |
+
"logging_steps": 500,
|
64 |
+
"max_steps": 1280,
|
65 |
+
"num_train_epochs": 5,
|
66 |
+
"save_steps": 100,
|
67 |
+
"total_flos": 5457160921939152.0,
|
68 |
+
"trial_name": null,
|
69 |
+
"trial_params": null
|
70 |
+
}
|
deberta-v3-finetuned/fold_0/0/checkpoint-500/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d0439c9395e86496a1acd5f3dc9d68a57fea982eccfb5766d700c8191ec8b133
|
3 |
+
size 4091
|
deberta-v3-finetuned/fold_0/config.json
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "/gpfs/home/jc3821/kaggle/microsoft-deberta-v3-large",
|
3 |
+
"architectures": [
|
4 |
+
"DebertaV2ForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.007,
|
7 |
+
"hidden_act": "gelu",
|
8 |
+
"hidden_dropout_prob": 0.007,
|
9 |
+
"hidden_size": 1024,
|
10 |
+
"id2label": {
|
11 |
+
"0": "LABEL_0"
|
12 |
+
},
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 4096,
|
15 |
+
"label2id": {
|
16 |
+
"LABEL_0": 0
|
17 |
+
},
|
18 |
+
"layer_norm_eps": 1e-07,
|
19 |
+
"max_position_embeddings": 512,
|
20 |
+
"max_relative_positions": -1,
|
21 |
+
"model_type": "deberta-v2",
|
22 |
+
"norm_rel_ebd": "layer_norm",
|
23 |
+
"num_attention_heads": 16,
|
24 |
+
"num_hidden_layers": 24,
|
25 |
+
"pad_token_id": 0,
|
26 |
+
"pooler_dropout": 0,
|
27 |
+
"pooler_hidden_act": "gelu",
|
28 |
+
"pooler_hidden_size": 1024,
|
29 |
+
"pos_att_type": [
|
30 |
+
"p2c",
|
31 |
+
"c2p"
|
32 |
+
],
|
33 |
+
"position_biased_input": false,
|
34 |
+
"position_buckets": 256,
|
35 |
+
"problem_type": "regression",
|
36 |
+
"relative_attention": true,
|
37 |
+
"share_att_key": true,
|
38 |
+
"torch_dtype": "float32",
|
39 |
+
"transformers_version": "4.32.1",
|
40 |
+
"type_vocab_size": 0,
|
41 |
+
"vocab_size": 128100
|
42 |
+
}
|
deberta-v3-finetuned/fold_0/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bdc5261521c4981b187281dea24a24d8290fd2c4e222a5efff1cf0e9234c7cf5
|
3 |
+
size 1740387701
|
deberta-v3-finetuned/fold_0/special_tokens_map.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "[CLS]",
|
3 |
+
"cls_token": "[CLS]",
|
4 |
+
"eos_token": "[SEP]",
|
5 |
+
"mask_token": "[MASK]",
|
6 |
+
"pad_token": "[PAD]",
|
7 |
+
"sep_token": "[SEP]",
|
8 |
+
"unk_token": "[UNK]"
|
9 |
+
}
|
deberta-v3-finetuned/fold_0/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
deberta-v3-finetuned/fold_0/tokenizer_config.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "[CLS]",
|
3 |
+
"clean_up_tokenization_spaces": true,
|
4 |
+
"cls_token": "[CLS]",
|
5 |
+
"do_lower_case": false,
|
6 |
+
"eos_token": "[SEP]",
|
7 |
+
"mask_token": "[MASK]",
|
8 |
+
"model_max_length": 1000000000000000019884624838656,
|
9 |
+
"pad_token": "[PAD]",
|
10 |
+
"sep_token": "[SEP]",
|
11 |
+
"sp_model_kwargs": {},
|
12 |
+
"split_by_punct": false,
|
13 |
+
"tokenizer_class": "DebertaV2Tokenizer",
|
14 |
+
"unk_token": "[UNK]",
|
15 |
+
"vocab_type": "spm"
|
16 |
+
}
|
deberta-v3-finetuned/fold_1/1/checkpoint-1000/config.json
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "/gpfs/home/jc3821/kaggle/microsoft-deberta-v3-large",
|
3 |
+
"architectures": [
|
4 |
+
"DebertaV2ForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.007,
|
7 |
+
"hidden_act": "gelu",
|
8 |
+
"hidden_dropout_prob": 0.007,
|
9 |
+
"hidden_size": 1024,
|
10 |
+
"id2label": {
|
11 |
+
"0": "LABEL_0"
|
12 |
+
},
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 4096,
|
15 |
+
"label2id": {
|
16 |
+
"LABEL_0": 0
|
17 |
+
},
|
18 |
+
"layer_norm_eps": 1e-07,
|
19 |
+
"max_position_embeddings": 512,
|
20 |
+
"max_relative_positions": -1,
|
21 |
+
"model_type": "deberta-v2",
|
22 |
+
"norm_rel_ebd": "layer_norm",
|
23 |
+
"num_attention_heads": 16,
|
24 |
+
"num_hidden_layers": 24,
|
25 |
+
"pad_token_id": 0,
|
26 |
+
"pooler_dropout": 0,
|
27 |
+
"pooler_hidden_act": "gelu",
|
28 |
+
"pooler_hidden_size": 1024,
|
29 |
+
"pos_att_type": [
|
30 |
+
"p2c",
|
31 |
+
"c2p"
|
32 |
+
],
|
33 |
+
"position_biased_input": false,
|
34 |
+
"position_buckets": 256,
|
35 |
+
"problem_type": "regression",
|
36 |
+
"relative_attention": true,
|
37 |
+
"share_att_key": true,
|
38 |
+
"torch_dtype": "float32",
|
39 |
+
"transformers_version": "4.32.1",
|
40 |
+
"type_vocab_size": 0,
|
41 |
+
"vocab_size": 128100
|
42 |
+
}
|
deberta-v3-finetuned/fold_1/1/checkpoint-1000/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:635821f4fe588432a6200b884f1b1d97ceabcde528ef510f99a3074b07be7eb9
|
3 |
+
size 3480831547
|
deberta-v3-finetuned/fold_1/1/checkpoint-1000/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f2cbd3bab0adc0d8c2db3cdd23f8fc8a30712e8f7908c9a31e7d2da1698518f
|
3 |
+
size 1740387701
|
deberta-v3-finetuned/fold_1/1/checkpoint-1000/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5458d3bcbcf4f0bc302eba1f212281704d725141b083eb254d6baf69117be06c
|
3 |
+
size 14575
|
deberta-v3-finetuned/fold_1/1/checkpoint-1000/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41a3073593c1c4cceb7a03282a47eee18756b89792798e8d3b798cb70f5a3bbe
|
3 |
+
size 627
|
deberta-v3-finetuned/fold_1/1/checkpoint-1000/special_tokens_map.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "[CLS]",
|
3 |
+
"cls_token": "[CLS]",
|
4 |
+
"eos_token": "[SEP]",
|
5 |
+
"mask_token": "[MASK]",
|
6 |
+
"pad_token": "[PAD]",
|
7 |
+
"sep_token": "[SEP]",
|
8 |
+
"unk_token": "[UNK]"
|
9 |
+
}
|
deberta-v3-finetuned/fold_1/1/checkpoint-1000/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
deberta-v3-finetuned/fold_1/1/checkpoint-1000/tokenizer_config.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "[CLS]",
|
3 |
+
"clean_up_tokenization_spaces": true,
|
4 |
+
"cls_token": "[CLS]",
|
5 |
+
"do_lower_case": false,
|
6 |
+
"eos_token": "[SEP]",
|
7 |
+
"mask_token": "[MASK]",
|
8 |
+
"model_max_length": 1000000000000000019884624838656,
|
9 |
+
"pad_token": "[PAD]",
|
10 |
+
"sep_token": "[SEP]",
|
11 |
+
"sp_model_kwargs": {},
|
12 |
+
"split_by_punct": false,
|
13 |
+
"tokenizer_class": "DebertaV2Tokenizer",
|
14 |
+
"unk_token": "[UNK]",
|
15 |
+
"vocab_type": "spm"
|
16 |
+
}
|
deberta-v3-finetuned/fold_1/1/checkpoint-1000/trainer_state.json
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.4920215308666229,
|
3 |
+
"best_model_checkpoint": "/gpfs/home/jc3821/kaggle/content/deberta-v3-finetuned/fold_1/1/checkpoint-1000",
|
4 |
+
"epoch": 3.875968992248062,
|
5 |
+
"eval_steps": 100,
|
6 |
+
"global_step": 1000,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.39,
|
13 |
+
"eval_loss": 0.2649173140525818,
|
14 |
+
"eval_rmse": 0.5147011876106262,
|
15 |
+
"eval_runtime": 31.3445,
|
16 |
+
"eval_samples_per_second": 64.094,
|
17 |
+
"eval_steps_per_second": 8.04,
|
18 |
+
"step": 100
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"epoch": 0.78,
|
22 |
+
"eval_loss": 0.4311714172363281,
|
23 |
+
"eval_rmse": 0.6566364169120789,
|
24 |
+
"eval_runtime": 31.3047,
|
25 |
+
"eval_samples_per_second": 64.176,
|
26 |
+
"eval_steps_per_second": 8.05,
|
27 |
+
"step": 200
|
28 |
+
},
|
29 |
+
{
|
30 |
+
"epoch": 1.16,
|
31 |
+
"eval_loss": 0.3980819880962372,
|
32 |
+
"eval_rmse": 0.630937397480011,
|
33 |
+
"eval_runtime": 31.303,
|
34 |
+
"eval_samples_per_second": 64.179,
|
35 |
+
"eval_steps_per_second": 8.05,
|
36 |
+
"step": 300
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"epoch": 1.55,
|
40 |
+
"eval_loss": 0.2832517623901367,
|
41 |
+
"eval_rmse": 0.5322140455245972,
|
42 |
+
"eval_runtime": 31.2992,
|
43 |
+
"eval_samples_per_second": 64.187,
|
44 |
+
"eval_steps_per_second": 8.051,
|
45 |
+
"step": 400
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"epoch": 1.94,
|
49 |
+
"learning_rate": 9.186046511627908e-06,
|
50 |
+
"loss": 0.2244,
|
51 |
+
"step": 500
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 1.94,
|
55 |
+
"eval_loss": 0.26878467202186584,
|
56 |
+
"eval_rmse": 0.5184444785118103,
|
57 |
+
"eval_runtime": 31.2847,
|
58 |
+
"eval_samples_per_second": 64.217,
|
59 |
+
"eval_steps_per_second": 8.055,
|
60 |
+
"step": 500
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"epoch": 2.33,
|
64 |
+
"eval_loss": 0.2900581955909729,
|
65 |
+
"eval_rmse": 0.5385705232620239,
|
66 |
+
"eval_runtime": 31.3051,
|
67 |
+
"eval_samples_per_second": 64.175,
|
68 |
+
"eval_steps_per_second": 8.05,
|
69 |
+
"step": 600
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"epoch": 2.71,
|
73 |
+
"eval_loss": 0.3477973937988281,
|
74 |
+
"eval_rmse": 0.5897434949874878,
|
75 |
+
"eval_runtime": 31.2983,
|
76 |
+
"eval_samples_per_second": 64.189,
|
77 |
+
"eval_steps_per_second": 8.052,
|
78 |
+
"step": 700
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"epoch": 3.1,
|
82 |
+
"eval_loss": 0.27153390645980835,
|
83 |
+
"eval_rmse": 0.5210891366004944,
|
84 |
+
"eval_runtime": 31.2968,
|
85 |
+
"eval_samples_per_second": 64.192,
|
86 |
+
"eval_steps_per_second": 8.052,
|
87 |
+
"step": 800
|
88 |
+
},
|
89 |
+
{
|
90 |
+
"epoch": 3.49,
|
91 |
+
"eval_loss": 0.24941422045230865,
|
92 |
+
"eval_rmse": 0.4994138777256012,
|
93 |
+
"eval_runtime": 31.3146,
|
94 |
+
"eval_samples_per_second": 64.155,
|
95 |
+
"eval_steps_per_second": 8.047,
|
96 |
+
"step": 900
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"epoch": 3.88,
|
100 |
+
"learning_rate": 3.372093023255814e-06,
|
101 |
+
"loss": 0.0945,
|
102 |
+
"step": 1000
|
103 |
+
},
|
104 |
+
{
|
105 |
+
"epoch": 3.88,
|
106 |
+
"eval_loss": 0.24208517372608185,
|
107 |
+
"eval_rmse": 0.4920215308666229,
|
108 |
+
"eval_runtime": 31.2894,
|
109 |
+
"eval_samples_per_second": 64.207,
|
110 |
+
"eval_steps_per_second": 8.054,
|
111 |
+
"step": 1000
|
112 |
+
}
|
113 |
+
],
|
114 |
+
"logging_steps": 500,
|
115 |
+
"max_steps": 1290,
|
116 |
+
"num_train_epochs": 5,
|
117 |
+
"save_steps": 100,
|
118 |
+
"total_flos": 9605995008551688.0,
|
119 |
+
"trial_name": null,
|
120 |
+
"trial_params": null
|
121 |
+
}
|
deberta-v3-finetuned/fold_1/1/checkpoint-1000/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4eadc822c2797edd54906ce4e0b7f9ef7987f46ee2743298e1f123e0e41dc785
|
3 |
+
size 4091
|
deberta-v3-finetuned/fold_1/config.json
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "/gpfs/home/jc3821/kaggle/microsoft-deberta-v3-large",
|
3 |
+
"architectures": [
|
4 |
+
"DebertaV2ForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.007,
|
7 |
+
"hidden_act": "gelu",
|
8 |
+
"hidden_dropout_prob": 0.007,
|
9 |
+
"hidden_size": 1024,
|
10 |
+
"id2label": {
|
11 |
+
"0": "LABEL_0"
|
12 |
+
},
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 4096,
|
15 |
+
"label2id": {
|
16 |
+
"LABEL_0": 0
|
17 |
+
},
|
18 |
+
"layer_norm_eps": 1e-07,
|
19 |
+
"max_position_embeddings": 512,
|
20 |
+
"max_relative_positions": -1,
|
21 |
+
"model_type": "deberta-v2",
|
22 |
+
"norm_rel_ebd": "layer_norm",
|
23 |
+
"num_attention_heads": 16,
|
24 |
+
"num_hidden_layers": 24,
|
25 |
+
"pad_token_id": 0,
|
26 |
+
"pooler_dropout": 0,
|
27 |
+
"pooler_hidden_act": "gelu",
|
28 |
+
"pooler_hidden_size": 1024,
|
29 |
+
"pos_att_type": [
|
30 |
+
"p2c",
|
31 |
+
"c2p"
|
32 |
+
],
|
33 |
+
"position_biased_input": false,
|
34 |
+
"position_buckets": 256,
|
35 |
+
"problem_type": "regression",
|
36 |
+
"relative_attention": true,
|
37 |
+
"share_att_key": true,
|
38 |
+
"torch_dtype": "float32",
|
39 |
+
"transformers_version": "4.32.1",
|
40 |
+
"type_vocab_size": 0,
|
41 |
+
"vocab_size": 128100
|
42 |
+
}
|
deberta-v3-finetuned/fold_1/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f2cbd3bab0adc0d8c2db3cdd23f8fc8a30712e8f7908c9a31e7d2da1698518f
|
3 |
+
size 1740387701
|
deberta-v3-finetuned/fold_1/special_tokens_map.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "[CLS]",
|
3 |
+
"cls_token": "[CLS]",
|
4 |
+
"eos_token": "[SEP]",
|
5 |
+
"mask_token": "[MASK]",
|
6 |
+
"pad_token": "[PAD]",
|
7 |
+
"sep_token": "[SEP]",
|
8 |
+
"unk_token": "[UNK]"
|
9 |
+
}
|
deberta-v3-finetuned/fold_1/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
deberta-v3-finetuned/fold_1/tokenizer_config.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "[CLS]",
|
3 |
+
"clean_up_tokenization_spaces": true,
|
4 |
+
"cls_token": "[CLS]",
|
5 |
+
"do_lower_case": false,
|
6 |
+
"eos_token": "[SEP]",
|
7 |
+
"mask_token": "[MASK]",
|
8 |
+
"model_max_length": 1000000000000000019884624838656,
|
9 |
+
"pad_token": "[PAD]",
|
10 |
+
"sep_token": "[SEP]",
|
11 |
+
"sp_model_kwargs": {},
|
12 |
+
"split_by_punct": false,
|
13 |
+
"tokenizer_class": "DebertaV2Tokenizer",
|
14 |
+
"unk_token": "[UNK]",
|
15 |
+
"vocab_type": "spm"
|
16 |
+
}
|
deberta-v3-finetuned/fold_2/2/checkpoint-400/config.json
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "/gpfs/home/jc3821/kaggle/microsoft-deberta-v3-large",
|
3 |
+
"architectures": [
|
4 |
+
"DebertaV2ForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.007,
|
7 |
+
"hidden_act": "gelu",
|
8 |
+
"hidden_dropout_prob": 0.007,
|
9 |
+
"hidden_size": 1024,
|
10 |
+
"id2label": {
|
11 |
+
"0": "LABEL_0"
|
12 |
+
},
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 4096,
|
15 |
+
"label2id": {
|
16 |
+
"LABEL_0": 0
|
17 |
+
},
|
18 |
+
"layer_norm_eps": 1e-07,
|
19 |
+
"max_position_embeddings": 512,
|
20 |
+
"max_relative_positions": -1,
|
21 |
+
"model_type": "deberta-v2",
|
22 |
+
"norm_rel_ebd": "layer_norm",
|
23 |
+
"num_attention_heads": 16,
|
24 |
+
"num_hidden_layers": 24,
|
25 |
+
"pad_token_id": 0,
|
26 |
+
"pooler_dropout": 0,
|
27 |
+
"pooler_hidden_act": "gelu",
|
28 |
+
"pooler_hidden_size": 1024,
|
29 |
+
"pos_att_type": [
|
30 |
+
"p2c",
|
31 |
+
"c2p"
|
32 |
+
],
|
33 |
+
"position_biased_input": false,
|
34 |
+
"position_buckets": 256,
|
35 |
+
"problem_type": "regression",
|
36 |
+
"relative_attention": true,
|
37 |
+
"share_att_key": true,
|
38 |
+
"torch_dtype": "float32",
|
39 |
+
"transformers_version": "4.32.1",
|
40 |
+
"type_vocab_size": 0,
|
41 |
+
"vocab_size": 128100
|
42 |
+
}
|
deberta-v3-finetuned/fold_2/2/checkpoint-400/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce44670793ed58c21f8c2cbce6afc4efc891a30dfde7518e85135bd442780680
|
3 |
+
size 3480831547
|
deberta-v3-finetuned/fold_2/2/checkpoint-400/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d0bb0e3c058a48281a2b64af0f6ecdf014de51e36ea51b0251bfb5cc4c00691
|
3 |
+
size 1740387701
|
deberta-v3-finetuned/fold_2/2/checkpoint-400/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e4e100a81cf298499260f07579dde148991722b91ee300d8212533f095c23a93
|
3 |
+
size 14575
|
deberta-v3-finetuned/fold_2/2/checkpoint-400/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7cd6229b27eb04441fb524f85b3a83bef58d5a81203fc33e818a099412769c8
|
3 |
+
size 627
|
deberta-v3-finetuned/fold_2/2/checkpoint-400/special_tokens_map.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "[CLS]",
|
3 |
+
"cls_token": "[CLS]",
|
4 |
+
"eos_token": "[SEP]",
|
5 |
+
"mask_token": "[MASK]",
|
6 |
+
"pad_token": "[PAD]",
|
7 |
+
"sep_token": "[SEP]",
|
8 |
+
"unk_token": "[UNK]"
|
9 |
+
}
|
deberta-v3-finetuned/fold_2/2/checkpoint-400/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
deberta-v3-finetuned/fold_2/2/checkpoint-400/tokenizer_config.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "[CLS]",
|
3 |
+
"clean_up_tokenization_spaces": true,
|
4 |
+
"cls_token": "[CLS]",
|
5 |
+
"do_lower_case": false,
|
6 |
+
"eos_token": "[SEP]",
|
7 |
+
"mask_token": "[MASK]",
|
8 |
+
"model_max_length": 1000000000000000019884624838656,
|
9 |
+
"pad_token": "[PAD]",
|
10 |
+
"sep_token": "[SEP]",
|
11 |
+
"sp_model_kwargs": {},
|
12 |
+
"split_by_punct": false,
|
13 |
+
"tokenizer_class": "DebertaV2Tokenizer",
|
14 |
+
"unk_token": "[UNK]",
|
15 |
+
"vocab_type": "spm"
|
16 |
+
}
|
deberta-v3-finetuned/fold_2/2/checkpoint-400/trainer_state.json
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.43666166067123413,
|
3 |
+
"best_model_checkpoint": "/gpfs/home/jc3821/kaggle/content/deberta-v3-finetuned/fold_2/2/checkpoint-400",
|
4 |
+
"epoch": 1.5444015444015444,
|
5 |
+
"eval_steps": 100,
|
6 |
+
"global_step": 400,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.39,
|
13 |
+
"eval_loss": 0.31523793935775757,
|
14 |
+
"eval_rmse": 0.561460554599762,
|
15 |
+
"eval_runtime": 28.1344,
|
16 |
+
"eval_samples_per_second": 70.945,
|
17 |
+
"eval_steps_per_second": 8.886,
|
18 |
+
"step": 100
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"epoch": 0.77,
|
22 |
+
"eval_loss": 0.30585768818855286,
|
23 |
+
"eval_rmse": 0.5530440211296082,
|
24 |
+
"eval_runtime": 28.135,
|
25 |
+
"eval_samples_per_second": 70.944,
|
26 |
+
"eval_steps_per_second": 8.886,
|
27 |
+
"step": 200
|
28 |
+
},
|
29 |
+
{
|
30 |
+
"epoch": 1.16,
|
31 |
+
"eval_loss": 0.33985063433647156,
|
32 |
+
"eval_rmse": 0.5829670429229736,
|
33 |
+
"eval_runtime": 28.1345,
|
34 |
+
"eval_samples_per_second": 70.945,
|
35 |
+
"eval_steps_per_second": 8.886,
|
36 |
+
"step": 300
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"epoch": 1.54,
|
40 |
+
"eval_loss": 0.19067342579364777,
|
41 |
+
"eval_rmse": 0.43666166067123413,
|
42 |
+
"eval_runtime": 28.134,
|
43 |
+
"eval_samples_per_second": 70.946,
|
44 |
+
"eval_steps_per_second": 8.886,
|
45 |
+
"step": 400
|
46 |
+
}
|
47 |
+
],
|
48 |
+
"logging_steps": 500,
|
49 |
+
"max_steps": 1295,
|
50 |
+
"num_train_epochs": 5,
|
51 |
+
"save_steps": 100,
|
52 |
+
"total_flos": 4057351609521216.0,
|
53 |
+
"trial_name": null,
|
54 |
+
"trial_params": null
|
55 |
+
}
|
deberta-v3-finetuned/fold_2/2/checkpoint-400/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41d0d43d565b7b3f73ca58f024d9c5d913daab26a6cc112d5207845da19c1431
|
3 |
+
size 4091
|
deberta-v3-finetuned/fold_2/config.json
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "/gpfs/home/jc3821/kaggle/microsoft-deberta-v3-large",
|
3 |
+
"architectures": [
|
4 |
+
"DebertaV2ForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.007,
|
7 |
+
"hidden_act": "gelu",
|
8 |
+
"hidden_dropout_prob": 0.007,
|
9 |
+
"hidden_size": 1024,
|
10 |
+
"id2label": {
|
11 |
+
"0": "LABEL_0"
|
12 |
+
},
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 4096,
|
15 |
+
"label2id": {
|
16 |
+
"LABEL_0": 0
|
17 |
+
},
|
18 |
+
"layer_norm_eps": 1e-07,
|
19 |
+
"max_position_embeddings": 512,
|
20 |
+
"max_relative_positions": -1,
|
21 |
+
"model_type": "deberta-v2",
|
22 |
+
"norm_rel_ebd": "layer_norm",
|
23 |
+
"num_attention_heads": 16,
|
24 |
+
"num_hidden_layers": 24,
|
25 |
+
"pad_token_id": 0,
|
26 |
+
"pooler_dropout": 0,
|
27 |
+
"pooler_hidden_act": "gelu",
|
28 |
+
"pooler_hidden_size": 1024,
|
29 |
+
"pos_att_type": [
|
30 |
+
"p2c",
|
31 |
+
"c2p"
|
32 |
+
],
|
33 |
+
"position_biased_input": false,
|
34 |
+
"position_buckets": 256,
|
35 |
+
"problem_type": "regression",
|
36 |
+
"relative_attention": true,
|
37 |
+
"share_att_key": true,
|
38 |
+
"torch_dtype": "float32",
|
39 |
+
"transformers_version": "4.32.1",
|
40 |
+
"type_vocab_size": 0,
|
41 |
+
"vocab_size": 128100
|
42 |
+
}
|
deberta-v3-finetuned/fold_2/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d0bb0e3c058a48281a2b64af0f6ecdf014de51e36ea51b0251bfb5cc4c00691
|
3 |
+
size 1740387701
|
deberta-v3-finetuned/fold_2/special_tokens_map.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "[CLS]",
|
3 |
+
"cls_token": "[CLS]",
|
4 |
+
"eos_token": "[SEP]",
|
5 |
+
"mask_token": "[MASK]",
|
6 |
+
"pad_token": "[PAD]",
|
7 |
+
"sep_token": "[SEP]",
|
8 |
+
"unk_token": "[UNK]"
|
9 |
+
}
|
deberta-v3-finetuned/fold_2/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
deberta-v3-finetuned/fold_2/tokenizer_config.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "[CLS]",
|
3 |
+
"clean_up_tokenization_spaces": true,
|
4 |
+
"cls_token": "[CLS]",
|
5 |
+
"do_lower_case": false,
|
6 |
+
"eos_token": "[SEP]",
|
7 |
+
"mask_token": "[MASK]",
|
8 |
+
"model_max_length": 1000000000000000019884624838656,
|
9 |
+
"pad_token": "[PAD]",
|
10 |
+
"sep_token": "[SEP]",
|
11 |
+
"sp_model_kwargs": {},
|
12 |
+
"split_by_punct": false,
|
13 |
+
"tokenizer_class": "DebertaV2Tokenizer",
|
14 |
+
"unk_token": "[UNK]",
|
15 |
+
"vocab_type": "spm"
|
16 |
+
}
|
deberta-v3-finetuned/fold_3/3/checkpoint-100/config.json
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "/gpfs/home/jc3821/kaggle/microsoft-deberta-v3-large",
|
3 |
+
"architectures": [
|
4 |
+
"DebertaV2ForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.007,
|
7 |
+
"hidden_act": "gelu",
|
8 |
+
"hidden_dropout_prob": 0.007,
|
9 |
+
"hidden_size": 1024,
|
10 |
+
"id2label": {
|
11 |
+
"0": "LABEL_0"
|
12 |
+
},
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 4096,
|
15 |
+
"label2id": {
|
16 |
+
"LABEL_0": 0
|
17 |
+
},
|
18 |
+
"layer_norm_eps": 1e-07,
|
19 |
+
"max_position_embeddings": 512,
|
20 |
+
"max_relative_positions": -1,
|
21 |
+
"model_type": "deberta-v2",
|
22 |
+
"norm_rel_ebd": "layer_norm",
|
23 |
+
"num_attention_heads": 16,
|
24 |
+
"num_hidden_layers": 24,
|
25 |
+
"pad_token_id": 0,
|
26 |
+
"pooler_dropout": 0,
|
27 |
+
"pooler_hidden_act": "gelu",
|
28 |
+
"pooler_hidden_size": 1024,
|
29 |
+
"pos_att_type": [
|
30 |
+
"p2c",
|
31 |
+
"c2p"
|
32 |
+
],
|
33 |
+
"position_biased_input": false,
|
34 |
+
"position_buckets": 256,
|
35 |
+
"problem_type": "regression",
|
36 |
+
"relative_attention": true,
|
37 |
+
"share_att_key": true,
|
38 |
+
"torch_dtype": "float32",
|
39 |
+
"transformers_version": "4.32.1",
|
40 |
+
"type_vocab_size": 0,
|
41 |
+
"vocab_size": 128100
|
42 |
+
}
|
deberta-v3-finetuned/fold_3/3/checkpoint-100/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b20b449044c88ca077f2e4d53ed3e7965841a392bfcc938260acb2ff57021f6c
|
3 |
+
size 3480831547
|
deberta-v3-finetuned/fold_3/3/checkpoint-100/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c9b7956e94bf38e46350de37247628745d4153d0f030e66a40e9099c62a7e70
|
3 |
+
size 1740387701
|
deberta-v3-finetuned/fold_3/3/checkpoint-100/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:72b166cc4874ed59847a59cc4b7fa887c9be4c1c2d459fe1b29872f6ec46e8ea
|
3 |
+
size 14575
|
deberta-v3-finetuned/fold_3/3/checkpoint-100/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64a05a810a830e4742e4818737ab479a0943e4c2e2dab122df5475f155021251
|
3 |
+
size 627
|