sontn122 commited on
Commit
cc179f3
1 Parent(s): fe1cbf3

End of training

Browse files
README.md ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ base_model: microsoft/deberta-v3-large
4
+ tags:
5
+ - generated_from_trainer
6
+ model-index:
7
+ - name: microsoft/deberta-v3-large
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ # microsoft/deberta-v3-large
15
+
16
+ This model is a fine-tuned version of [microsoft/deberta-v3-large](https://huggingface.co/microsoft/deberta-v3-large) on the None dataset.
17
+ It achieves the following results on the evaluation set:
18
+ - Loss: 1.6094
19
+
20
+ ## Model description
21
+
22
+ More information needed
23
+
24
+ ## Intended uses & limitations
25
+
26
+ More information needed
27
+
28
+ ## Training and evaluation data
29
+
30
+ More information needed
31
+
32
+ ## Training procedure
33
+
34
+ ### Training hyperparameters
35
+
36
+ The following hyperparameters were used during training:
37
+ - learning_rate: 5e-05
38
+ - train_batch_size: 8
39
+ - eval_batch_size: 2
40
+ - seed: 42
41
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
+ - lr_scheduler_type: linear
43
+ - num_epochs: 6
44
+
45
+ ### Training results
46
+
47
+ | Training Loss | Epoch | Step | Validation Loss |
48
+ |:-------------:|:-----:|:-----:|:---------------:|
49
+ | 1.6123 | 1.0 | 3550 | 1.6094 |
50
+ | 1.6124 | 2.0 | 7100 | 1.6094 |
51
+ | 1.6106 | 3.0 | 10650 | 1.6094 |
52
+ | 1.6107 | 4.0 | 14200 | 1.6094 |
53
+ | 1.6104 | 5.0 | 17750 | 1.6094 |
54
+ | 1.6115 | 6.0 | 21300 | 1.6094 |
55
+
56
+
57
+ ### Framework versions
58
+
59
+ - Transformers 4.32.1
60
+ - Pytorch 2.0.1+cu118
61
+ - Datasets 2.14.4
62
+ - Tokenizers 0.13.3
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-large",
3
+ "architectures": [
4
+ "DebertaV2ForMultipleChoice"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 1024,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 4096,
12
+ "layer_norm_eps": 1e-07,
13
+ "max_position_embeddings": 512,
14
+ "max_relative_positions": -1,
15
+ "model_type": "deberta-v2",
16
+ "norm_rel_ebd": "layer_norm",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 24,
19
+ "pad_token_id": 0,
20
+ "pooler_dropout": 0,
21
+ "pooler_hidden_act": "gelu",
22
+ "pooler_hidden_size": 1024,
23
+ "pos_att_type": [
24
+ "p2c",
25
+ "c2p"
26
+ ],
27
+ "position_biased_input": false,
28
+ "position_buckets": 256,
29
+ "relative_attention": true,
30
+ "share_att_key": true,
31
+ "torch_dtype": "float32",
32
+ "transformers_version": "4.32.1",
33
+ "type_vocab_size": 0,
34
+ "vocab_size": 128100
35
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a7e2e19366e3ae4fc0df3e0fbeea6a1ece6597a5764d20b640cd4e718ecea10
3
+ size 1740387701
special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": "[UNK]"
9
+ }
spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
submission.csv ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ id,prediction
2
+ 0,C D A
3
+ 1,B C E
4
+ 2,A B C
5
+ 3,D A B
6
+ 4,A C E
7
+ 5,B C D
8
+ 6,A B C
9
+ 7,A D B
10
+ 8,B D A
11
+ 9,A B E
12
+ 10,A B C
13
+ 11,B D E
14
+ 12,A B C
15
+ 13,A B C
16
+ 14,B D E
17
+ 15,A C D
18
+ 16,A B C
19
+ 17,A C D
20
+ 18,A B D
21
+ 19,D E A
22
+ 20,E A B
23
+ 21,D E A
24
+ 22,B C D
25
+ 23,A D B
26
+ 24,A B C
27
+ 25,A C E
28
+ 26,A C B
29
+ 27,C E A
30
+ 28,A B C
31
+ 29,A B C
32
+ 30,A B C
33
+ 31,B E A
34
+ 32,A B E
35
+ 33,C E A
36
+ 34,A D B
37
+ 35,A B C
38
+ 36,B C D
39
+ 37,B C D
40
+ 38,B C D
41
+ 39,B C D
42
+ 40,A C D
43
+ 41,A B C
44
+ 42,D A B
45
+ 43,A B C
46
+ 44,A B C
47
+ 45,A B C
48
+ 46,A E C
49
+ 47,A D E
50
+ 48,A B C
51
+ 49,A B C
52
+ 50,E A B
53
+ 51,B C E
54
+ 52,A B C
55
+ 53,A B E
56
+ 54,A C B
57
+ 55,C D A
58
+ 56,C D A
59
+ 57,A B C
60
+ 58,B C D
61
+ 59,A B E
62
+ 60,C D A
63
+ 61,C E A
64
+ 62,A C E
65
+ 63,C D E
66
+ 64,C D E
67
+ 65,B D E
68
+ 66,A B D
69
+ 67,A C D
70
+ 68,E B C
71
+ 69,A B C
72
+ 70,A B D
73
+ 71,B D E
74
+ 72,D E A
75
+ 73,A C D
76
+ 74,D E A
77
+ 75,B D A
78
+ 76,D E A
79
+ 77,B D A
80
+ 78,D A B
81
+ 79,A B E
82
+ 80,C A B
83
+ 81,A C E
84
+ 82,A B D
85
+ 83,C D A
86
+ 84,D E A
87
+ 85,A B C
88
+ 86,A C D
89
+ 87,B C D
90
+ 88,B E A
91
+ 89,A B D
92
+ 90,A B C
93
+ 91,B C E
94
+ 92,A E B
95
+ 93,A C D
96
+ 94,A C D
97
+ 95,B C D
98
+ 96,D E A
99
+ 97,C D E
100
+ 98,A B C
101
+ 99,A D E
102
+ 100,A D E
103
+ 101,A B E
104
+ 102,A D E
105
+ 103,A B C
106
+ 104,C D E
107
+ 105,A B D
108
+ 106,A B E
109
+ 107,B C D
110
+ 108,A B C
111
+ 109,A B C
112
+ 110,A B C
113
+ 111,E A B
114
+ 112,A B C
115
+ 113,B C E
116
+ 114,C D A
117
+ 115,D A B
118
+ 116,C D E
119
+ 117,A B C
120
+ 118,A B C
121
+ 119,A B C
122
+ 120,C D E
123
+ 121,A C D
124
+ 122,A C D
125
+ 123,A C B
126
+ 124,A C E
127
+ 125,B E A
128
+ 126,A D E
129
+ 127,A B D
130
+ 128,A B D
131
+ 129,A B C
132
+ 130,A C B
133
+ 131,A B D
134
+ 132,B C D
135
+ 133,A B C
136
+ 134,A B D
137
+ 135,A D E
138
+ 136,C A B
139
+ 137,B C D
140
+ 138,A C D
141
+ 139,A D B
142
+ 140,A B C
143
+ 141,B E A
144
+ 142,A C D
145
+ 143,A B C
146
+ 144,A B C
147
+ 145,A B D
148
+ 146,B C D
149
+ 147,D A B
150
+ 148,B C D
151
+ 149,B A C
152
+ 150,A C B
153
+ 151,A C D
154
+ 152,A E B
155
+ 153,A B D
156
+ 154,B C D
157
+ 155,D E A
158
+ 156,E A B
159
+ 157,A B C
160
+ 158,A B C
161
+ 159,A C B
162
+ 160,B C D
163
+ 161,A B C
164
+ 162,A B D
165
+ 163,A B C
166
+ 164,A D B
167
+ 165,A B D
168
+ 166,B C E
169
+ 167,A B D
170
+ 168,E A C
171
+ 169,C D E
172
+ 170,B C D
173
+ 171,A E B
174
+ 172,A B C
175
+ 173,C A B
176
+ 174,A D E
177
+ 175,C D E
178
+ 176,B C E
179
+ 177,B C D
180
+ 178,C A B
181
+ 179,E A B
182
+ 180,A B C
183
+ 181,A B C
184
+ 182,A B C
185
+ 183,A D E
186
+ 184,A B C
187
+ 185,B C D
188
+ 186,D A B
189
+ 187,A B C
190
+ 188,A C B
191
+ 189,A B C
192
+ 190,B A C
193
+ 191,A B C
194
+ 192,B C A
195
+ 193,A B C
196
+ 194,A D B
197
+ 195,A C D
198
+ 196,A C D
199
+ 197,A B C
200
+ 198,E A B
201
+ 199,A B C
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "clean_up_tokenization_spaces": true,
4
+ "cls_token": "[CLS]",
5
+ "do_lower_case": false,
6
+ "eos_token": "[SEP]",
7
+ "mask_token": "[MASK]",
8
+ "model_max_length": 1000000000000000019884624838656,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "sp_model_kwargs": {},
12
+ "split_by_punct": false,
13
+ "tokenizer_class": "DebertaV2Tokenizer",
14
+ "unk_token": "[UNK]",
15
+ "vocab_type": "spm"
16
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61a003149df66223efefce4a4da29df72d1bdded3739898f170fc86a70ebe274
3
+ size 4027