tanthinhdt commited on
Commit
680ad58
1 Parent(s): b129d8e

Training in progress, epoch 0

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-*/
config.json ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "VieSignLang/videomae-small-finetuned-kinetics-finetuned-vsl",
3
+ "architectures": [
4
+ "VideoMAEForVideoClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "decoder_hidden_size": 192,
8
+ "decoder_intermediate_size": 768,
9
+ "decoder_num_attention_heads": 3,
10
+ "decoder_num_hidden_layers": 12,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.0,
13
+ "hidden_size": 384,
14
+ "id2label": {
15
+ "0": "Con ch\u00f3",
16
+ "1": "Con m\u00e8o",
17
+ "2": "Con g\u00e0",
18
+ "3": "Con v\u1ecbt",
19
+ "4": "Con r\u00f9a",
20
+ "5": "Con th\u1ecf",
21
+ "6": "Con tr\u00e2u",
22
+ "7": "Con b\u00f2",
23
+ "8": "Con d\u00ea",
24
+ "9": "Con heo",
25
+ "10": "M\u00e0u \u0111en",
26
+ "11": "M\u00e0u tr\u1eafng",
27
+ "12": "M\u00e0u \u0111\u1ecf",
28
+ "13": "M\u00e0u cam",
29
+ "14": "M\u00e0u v\u00e0ng",
30
+ "15": "M\u00e0u l\u00e1 c\u00e2y",
31
+ "16": "M\u00e0u da tr\u1eddi",
32
+ "17": "M\u00e0u h\u1ed3ng",
33
+ "18": "M\u00e0u t\u00edm",
34
+ "19": "M\u00e0u n\u00e2u",
35
+ "20": "Qu\u1ea3 d\u00e2u",
36
+ "21": "Qu\u1ea3 m\u1eadn",
37
+ "22": "Qu\u1ea3 d\u1ee9a",
38
+ "23": "Qu\u1ea3 \u0111\u00e0o",
39
+ "24": "Qu\u1ea3 \u0111u \u0111\u1ee7",
40
+ "25": "Qu\u1ea3 cam",
41
+ "26": "Qu\u1ea3 b\u01a1",
42
+ "27": "Qu\u1ea3 chu\u1ed1i",
43
+ "28": "Qu\u1ea3 xo\u00e0i",
44
+ "29": "Qu\u1ea3 d\u1eeba",
45
+ "30": "B\u1ed1",
46
+ "31": "M\u1eb9",
47
+ "32": "Con trai",
48
+ "33": "Con g\u00e1i",
49
+ "34": "V\u1ee3",
50
+ "35": "Ch\u1ed3ng",
51
+ "36": "\u00d4ng n\u1ed9i",
52
+ "37": "B\u00e0 n\u1ed9i",
53
+ "38": "\u00d4ng ngo\u1ea1i",
54
+ "39": "B\u00e0 ngo\u1ea1i",
55
+ "40": "\u0102n",
56
+ "41": "U\u1ed1ng",
57
+ "42": "Xem",
58
+ "43": "Th\u00e8m",
59
+ "44": "M\u00e1ch",
60
+ "45": "Kh\u00f3c",
61
+ "46": "C\u01b0\u1eddi",
62
+ "47": "H\u1ecdc",
63
+ "48": "D\u1ed7i",
64
+ "49": "Ch\u1ebft",
65
+ "50": "\u0110i",
66
+ "51": "Ch\u1ea1y",
67
+ "52": "B\u1eadn",
68
+ "53": "H\u00e1t",
69
+ "54": "M\u00faa",
70
+ "55": "N\u1ea5u",
71
+ "56": "N\u01b0\u1edbng",
72
+ "57": "Nh\u1ea7m l\u1eabn",
73
+ "58": "Quan s\u00e1t",
74
+ "59": "C\u1eafm tr\u1ea1i",
75
+ "60": "Cung c\u1ea5p",
76
+ "61": "B\u1eaft ch\u01b0\u1edbc",
77
+ "62": "B\u1eaft bu\u1ed9c",
78
+ "63": "B\u00e1o c\u00e1o",
79
+ "64": "Mua b\u00e1n",
80
+ "65": "Kh\u00f4ng quen",
81
+ "66": "Kh\u00f4ng n\u00ean",
82
+ "67": "Kh\u00f4ng c\u1ea7n",
83
+ "68": "Kh\u00f4ng cho",
84
+ "69": "Kh\u00f4ng nghe l\u1eddi",
85
+ "70": "M\u1eb7n",
86
+ "71": "\u0110\u1eafng",
87
+ "72": "Cay",
88
+ "73": "Ng\u1ecdt",
89
+ "74": "\u0110\u1eadm",
90
+ "75": "Nh\u1ea1t",
91
+ "76": "Ngon mi\u1ec7ng",
92
+ "77": "X\u1ea5u",
93
+ "78": "\u0110\u1eb9p",
94
+ "79": "Ch\u1eadt",
95
+ "80": "H\u1eb9p",
96
+ "81": "R\u1ed9ng",
97
+ "82": "D\u00e0i",
98
+ "83": "Cao",
99
+ "84": "L\u00f9n",
100
+ "85": "\u1ed0m",
101
+ "86": "M\u1eadp",
102
+ "87": "Ngoan",
103
+ "88": "H\u01b0",
104
+ "89": "Kh\u1ecfe",
105
+ "90": "M\u1ec7t",
106
+ "91": "\u0110au",
107
+ "92": "Gi\u1ecfi",
108
+ "93": "Ch\u0103m ch\u1ec9",
109
+ "94": "L\u01b0\u1eddi bi\u1ebfng",
110
+ "95": "T\u1ed1t b\u1ee5ng",
111
+ "96": "Th\u00fa v\u1ecb",
112
+ "97": "H\u00e0i h\u01b0\u1edbc",
113
+ "98": "D\u0169ng c\u1ea3m",
114
+ "99": "S\u00e1ng t\u1ea1o"
115
+ },
116
+ "image_size": 224,
117
+ "initializer_range": 0.02,
118
+ "intermediate_size": 1536,
119
+ "label2id": {
120
+ "B\u00e0 ngo\u1ea1i": 39,
121
+ "B\u00e0 n\u1ed9i": 37,
122
+ "B\u00e1o c\u00e1o": 63,
123
+ "B\u1eadn": 52,
124
+ "B\u1eaft bu\u1ed9c": 62,
125
+ "B\u1eaft ch\u01b0\u1edbc": 61,
126
+ "B\u1ed1": 30,
127
+ "Cao": 83,
128
+ "Cay": 72,
129
+ "Ch\u0103m ch\u1ec9": 93,
130
+ "Ch\u1ea1y": 51,
131
+ "Ch\u1eadt": 79,
132
+ "Ch\u1ebft": 49,
133
+ "Ch\u1ed3ng": 35,
134
+ "Con b\u00f2": 7,
135
+ "Con ch\u00f3": 0,
136
+ "Con d\u00ea": 8,
137
+ "Con g\u00e0": 2,
138
+ "Con g\u00e1i": 33,
139
+ "Con heo": 9,
140
+ "Con m\u00e8o": 1,
141
+ "Con r\u00f9a": 4,
142
+ "Con th\u1ecf": 5,
143
+ "Con trai": 32,
144
+ "Con tr\u00e2u": 6,
145
+ "Con v\u1ecbt": 3,
146
+ "Cung c\u1ea5p": 60,
147
+ "C\u01b0\u1eddi": 46,
148
+ "C\u1eafm tr\u1ea1i": 59,
149
+ "D\u00e0i": 82,
150
+ "D\u0169ng c\u1ea3m": 98,
151
+ "D\u1ed7i": 48,
152
+ "Gi\u1ecfi": 92,
153
+ "H\u00e0i h\u01b0\u1edbc": 97,
154
+ "H\u00e1t": 53,
155
+ "H\u01b0": 88,
156
+ "H\u1eb9p": 80,
157
+ "H\u1ecdc": 47,
158
+ "Kh\u00f3c": 45,
159
+ "Kh\u00f4ng cho": 68,
160
+ "Kh\u00f4ng c\u1ea7n": 67,
161
+ "Kh\u00f4ng nghe l\u1eddi": 69,
162
+ "Kh\u00f4ng n\u00ean": 66,
163
+ "Kh\u00f4ng quen": 65,
164
+ "Kh\u1ecfe": 89,
165
+ "L\u00f9n": 84,
166
+ "L\u01b0\u1eddi bi\u1ebfng": 94,
167
+ "Mua b\u00e1n": 64,
168
+ "M\u00e0u cam": 13,
169
+ "M\u00e0u da tr\u1eddi": 16,
170
+ "M\u00e0u h\u1ed3ng": 17,
171
+ "M\u00e0u l\u00e1 c\u00e2y": 15,
172
+ "M\u00e0u n\u00e2u": 19,
173
+ "M\u00e0u tr\u1eafng": 11,
174
+ "M\u00e0u t\u00edm": 18,
175
+ "M\u00e0u v\u00e0ng": 14,
176
+ "M\u00e0u \u0111en": 10,
177
+ "M\u00e0u \u0111\u1ecf": 12,
178
+ "M\u00e1ch": 44,
179
+ "M\u00faa": 54,
180
+ "M\u1eadp": 86,
181
+ "M\u1eb7n": 70,
182
+ "M\u1eb9": 31,
183
+ "M\u1ec7t": 90,
184
+ "Ngoan": 87,
185
+ "Ngon mi\u1ec7ng": 76,
186
+ "Ng\u1ecdt": 73,
187
+ "Nh\u1ea1t": 75,
188
+ "Nh\u1ea7m l\u1eabn": 57,
189
+ "N\u01b0\u1edbng": 56,
190
+ "N\u1ea5u": 55,
191
+ "Quan s\u00e1t": 58,
192
+ "Qu\u1ea3 b\u01a1": 26,
193
+ "Qu\u1ea3 cam": 25,
194
+ "Qu\u1ea3 chu\u1ed1i": 27,
195
+ "Qu\u1ea3 d\u00e2u": 20,
196
+ "Qu\u1ea3 d\u1ee9a": 22,
197
+ "Qu\u1ea3 d\u1eeba": 29,
198
+ "Qu\u1ea3 m\u1eadn": 21,
199
+ "Qu\u1ea3 xo\u00e0i": 28,
200
+ "Qu\u1ea3 \u0111u \u0111\u1ee7": 24,
201
+ "Qu\u1ea3 \u0111\u00e0o": 23,
202
+ "R\u1ed9ng": 81,
203
+ "S\u00e1ng t\u1ea1o": 99,
204
+ "Th\u00e8m": 43,
205
+ "Th\u00fa v\u1ecb": 96,
206
+ "T\u1ed1t b\u1ee5ng": 95,
207
+ "U\u1ed1ng": 41,
208
+ "V\u1ee3": 34,
209
+ "Xem": 42,
210
+ "X\u1ea5u": 77,
211
+ "\u00d4ng ngo\u1ea1i": 38,
212
+ "\u00d4ng n\u1ed9i": 36,
213
+ "\u0102n": 40,
214
+ "\u0110au": 91,
215
+ "\u0110i": 50,
216
+ "\u0110\u1eadm": 74,
217
+ "\u0110\u1eafng": 71,
218
+ "\u0110\u1eb9p": 78,
219
+ "\u1ed0m": 85
220
+ },
221
+ "layer_norm_eps": 1e-12,
222
+ "model_type": "videomae",
223
+ "norm_pix_loss": true,
224
+ "num_attention_heads": 16,
225
+ "num_channels": 3,
226
+ "num_frames": 16,
227
+ "num_hidden_layers": 12,
228
+ "patch_size": 16,
229
+ "problem_type": "single_label_classification",
230
+ "qkv_bias": true,
231
+ "torch_dtype": "float32",
232
+ "transformers_version": "4.28.1",
233
+ "tubelet_size": 2,
234
+ "use_mean_pooling": true
235
+ }
last-checkpoint/config.json ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "VieSignLang/videomae-small-finetuned-kinetics-finetuned-vsl",
3
+ "architectures": [
4
+ "VideoMAEForVideoClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "decoder_hidden_size": 192,
8
+ "decoder_intermediate_size": 768,
9
+ "decoder_num_attention_heads": 3,
10
+ "decoder_num_hidden_layers": 12,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.0,
13
+ "hidden_size": 384,
14
+ "id2label": {
15
+ "0": "Con ch\u00f3",
16
+ "1": "Con m\u00e8o",
17
+ "2": "Con g\u00e0",
18
+ "3": "Con v\u1ecbt",
19
+ "4": "Con r\u00f9a",
20
+ "5": "Con th\u1ecf",
21
+ "6": "Con tr\u00e2u",
22
+ "7": "Con b\u00f2",
23
+ "8": "Con d\u00ea",
24
+ "9": "Con heo",
25
+ "10": "M\u00e0u \u0111en",
26
+ "11": "M\u00e0u tr\u1eafng",
27
+ "12": "M\u00e0u \u0111\u1ecf",
28
+ "13": "M\u00e0u cam",
29
+ "14": "M\u00e0u v\u00e0ng",
30
+ "15": "M\u00e0u l\u00e1 c\u00e2y",
31
+ "16": "M\u00e0u da tr\u1eddi",
32
+ "17": "M\u00e0u h\u1ed3ng",
33
+ "18": "M\u00e0u t\u00edm",
34
+ "19": "M\u00e0u n\u00e2u",
35
+ "20": "Qu\u1ea3 d\u00e2u",
36
+ "21": "Qu\u1ea3 m\u1eadn",
37
+ "22": "Qu\u1ea3 d\u1ee9a",
38
+ "23": "Qu\u1ea3 \u0111\u00e0o",
39
+ "24": "Qu\u1ea3 \u0111u \u0111\u1ee7",
40
+ "25": "Qu\u1ea3 cam",
41
+ "26": "Qu\u1ea3 b\u01a1",
42
+ "27": "Qu\u1ea3 chu\u1ed1i",
43
+ "28": "Qu\u1ea3 xo\u00e0i",
44
+ "29": "Qu\u1ea3 d\u1eeba",
45
+ "30": "B\u1ed1",
46
+ "31": "M\u1eb9",
47
+ "32": "Con trai",
48
+ "33": "Con g\u00e1i",
49
+ "34": "V\u1ee3",
50
+ "35": "Ch\u1ed3ng",
51
+ "36": "\u00d4ng n\u1ed9i",
52
+ "37": "B\u00e0 n\u1ed9i",
53
+ "38": "\u00d4ng ngo\u1ea1i",
54
+ "39": "B\u00e0 ngo\u1ea1i",
55
+ "40": "\u0102n",
56
+ "41": "U\u1ed1ng",
57
+ "42": "Xem",
58
+ "43": "Th\u00e8m",
59
+ "44": "M\u00e1ch",
60
+ "45": "Kh\u00f3c",
61
+ "46": "C\u01b0\u1eddi",
62
+ "47": "H\u1ecdc",
63
+ "48": "D\u1ed7i",
64
+ "49": "Ch\u1ebft",
65
+ "50": "\u0110i",
66
+ "51": "Ch\u1ea1y",
67
+ "52": "B\u1eadn",
68
+ "53": "H\u00e1t",
69
+ "54": "M\u00faa",
70
+ "55": "N\u1ea5u",
71
+ "56": "N\u01b0\u1edbng",
72
+ "57": "Nh\u1ea7m l\u1eabn",
73
+ "58": "Quan s\u00e1t",
74
+ "59": "C\u1eafm tr\u1ea1i",
75
+ "60": "Cung c\u1ea5p",
76
+ "61": "B\u1eaft ch\u01b0\u1edbc",
77
+ "62": "B\u1eaft bu\u1ed9c",
78
+ "63": "B\u00e1o c\u00e1o",
79
+ "64": "Mua b\u00e1n",
80
+ "65": "Kh\u00f4ng quen",
81
+ "66": "Kh\u00f4ng n\u00ean",
82
+ "67": "Kh\u00f4ng c\u1ea7n",
83
+ "68": "Kh\u00f4ng cho",
84
+ "69": "Kh\u00f4ng nghe l\u1eddi",
85
+ "70": "M\u1eb7n",
86
+ "71": "\u0110\u1eafng",
87
+ "72": "Cay",
88
+ "73": "Ng\u1ecdt",
89
+ "74": "\u0110\u1eadm",
90
+ "75": "Nh\u1ea1t",
91
+ "76": "Ngon mi\u1ec7ng",
92
+ "77": "X\u1ea5u",
93
+ "78": "\u0110\u1eb9p",
94
+ "79": "Ch\u1eadt",
95
+ "80": "H\u1eb9p",
96
+ "81": "R\u1ed9ng",
97
+ "82": "D\u00e0i",
98
+ "83": "Cao",
99
+ "84": "L\u00f9n",
100
+ "85": "\u1ed0m",
101
+ "86": "M\u1eadp",
102
+ "87": "Ngoan",
103
+ "88": "H\u01b0",
104
+ "89": "Kh\u1ecfe",
105
+ "90": "M\u1ec7t",
106
+ "91": "\u0110au",
107
+ "92": "Gi\u1ecfi",
108
+ "93": "Ch\u0103m ch\u1ec9",
109
+ "94": "L\u01b0\u1eddi bi\u1ebfng",
110
+ "95": "T\u1ed1t b\u1ee5ng",
111
+ "96": "Th\u00fa v\u1ecb",
112
+ "97": "H\u00e0i h\u01b0\u1edbc",
113
+ "98": "D\u0169ng c\u1ea3m",
114
+ "99": "S\u00e1ng t\u1ea1o"
115
+ },
116
+ "image_size": 224,
117
+ "initializer_range": 0.02,
118
+ "intermediate_size": 1536,
119
+ "label2id": {
120
+ "B\u00e0 ngo\u1ea1i": 39,
121
+ "B\u00e0 n\u1ed9i": 37,
122
+ "B\u00e1o c\u00e1o": 63,
123
+ "B\u1eadn": 52,
124
+ "B\u1eaft bu\u1ed9c": 62,
125
+ "B\u1eaft ch\u01b0\u1edbc": 61,
126
+ "B\u1ed1": 30,
127
+ "Cao": 83,
128
+ "Cay": 72,
129
+ "Ch\u0103m ch\u1ec9": 93,
130
+ "Ch\u1ea1y": 51,
131
+ "Ch\u1eadt": 79,
132
+ "Ch\u1ebft": 49,
133
+ "Ch\u1ed3ng": 35,
134
+ "Con b\u00f2": 7,
135
+ "Con ch\u00f3": 0,
136
+ "Con d\u00ea": 8,
137
+ "Con g\u00e0": 2,
138
+ "Con g\u00e1i": 33,
139
+ "Con heo": 9,
140
+ "Con m\u00e8o": 1,
141
+ "Con r\u00f9a": 4,
142
+ "Con th\u1ecf": 5,
143
+ "Con trai": 32,
144
+ "Con tr\u00e2u": 6,
145
+ "Con v\u1ecbt": 3,
146
+ "Cung c\u1ea5p": 60,
147
+ "C\u01b0\u1eddi": 46,
148
+ "C\u1eafm tr\u1ea1i": 59,
149
+ "D\u00e0i": 82,
150
+ "D\u0169ng c\u1ea3m": 98,
151
+ "D\u1ed7i": 48,
152
+ "Gi\u1ecfi": 92,
153
+ "H\u00e0i h\u01b0\u1edbc": 97,
154
+ "H\u00e1t": 53,
155
+ "H\u01b0": 88,
156
+ "H\u1eb9p": 80,
157
+ "H\u1ecdc": 47,
158
+ "Kh\u00f3c": 45,
159
+ "Kh\u00f4ng cho": 68,
160
+ "Kh\u00f4ng c\u1ea7n": 67,
161
+ "Kh\u00f4ng nghe l\u1eddi": 69,
162
+ "Kh\u00f4ng n\u00ean": 66,
163
+ "Kh\u00f4ng quen": 65,
164
+ "Kh\u1ecfe": 89,
165
+ "L\u00f9n": 84,
166
+ "L\u01b0\u1eddi bi\u1ebfng": 94,
167
+ "Mua b\u00e1n": 64,
168
+ "M\u00e0u cam": 13,
169
+ "M\u00e0u da tr\u1eddi": 16,
170
+ "M\u00e0u h\u1ed3ng": 17,
171
+ "M\u00e0u l\u00e1 c\u00e2y": 15,
172
+ "M\u00e0u n\u00e2u": 19,
173
+ "M\u00e0u tr\u1eafng": 11,
174
+ "M\u00e0u t\u00edm": 18,
175
+ "M\u00e0u v\u00e0ng": 14,
176
+ "M\u00e0u \u0111en": 10,
177
+ "M\u00e0u \u0111\u1ecf": 12,
178
+ "M\u00e1ch": 44,
179
+ "M\u00faa": 54,
180
+ "M\u1eadp": 86,
181
+ "M\u1eb7n": 70,
182
+ "M\u1eb9": 31,
183
+ "M\u1ec7t": 90,
184
+ "Ngoan": 87,
185
+ "Ngon mi\u1ec7ng": 76,
186
+ "Ng\u1ecdt": 73,
187
+ "Nh\u1ea1t": 75,
188
+ "Nh\u1ea7m l\u1eabn": 57,
189
+ "N\u01b0\u1edbng": 56,
190
+ "N\u1ea5u": 55,
191
+ "Quan s\u00e1t": 58,
192
+ "Qu\u1ea3 b\u01a1": 26,
193
+ "Qu\u1ea3 cam": 25,
194
+ "Qu\u1ea3 chu\u1ed1i": 27,
195
+ "Qu\u1ea3 d\u00e2u": 20,
196
+ "Qu\u1ea3 d\u1ee9a": 22,
197
+ "Qu\u1ea3 d\u1eeba": 29,
198
+ "Qu\u1ea3 m\u1eadn": 21,
199
+ "Qu\u1ea3 xo\u00e0i": 28,
200
+ "Qu\u1ea3 \u0111u \u0111\u1ee7": 24,
201
+ "Qu\u1ea3 \u0111\u00e0o": 23,
202
+ "R\u1ed9ng": 81,
203
+ "S\u00e1ng t\u1ea1o": 99,
204
+ "Th\u00e8m": 43,
205
+ "Th\u00fa v\u1ecb": 96,
206
+ "T\u1ed1t b\u1ee5ng": 95,
207
+ "U\u1ed1ng": 41,
208
+ "V\u1ee3": 34,
209
+ "Xem": 42,
210
+ "X\u1ea5u": 77,
211
+ "\u00d4ng ngo\u1ea1i": 38,
212
+ "\u00d4ng n\u1ed9i": 36,
213
+ "\u0102n": 40,
214
+ "\u0110au": 91,
215
+ "\u0110i": 50,
216
+ "\u0110\u1eadm": 74,
217
+ "\u0110\u1eafng": 71,
218
+ "\u0110\u1eb9p": 78,
219
+ "\u1ed0m": 85
220
+ },
221
+ "layer_norm_eps": 1e-12,
222
+ "model_type": "videomae",
223
+ "norm_pix_loss": true,
224
+ "num_attention_heads": 16,
225
+ "num_channels": 3,
226
+ "num_frames": 16,
227
+ "num_hidden_layers": 12,
228
+ "patch_size": 16,
229
+ "problem_type": "single_label_classification",
230
+ "qkv_bias": true,
231
+ "torch_dtype": "float32",
232
+ "transformers_version": "4.28.1",
233
+ "tubelet_size": 2,
234
+ "use_mean_pooling": true
235
+ }
last-checkpoint/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a503d8e99a00733369f764b4df3a863109988356beef87e9ca24e2da49fd0d5
3
+ size 175457082
last-checkpoint/preprocessor_config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_normalize": true,
8
+ "do_rescale": true,
9
+ "do_resize": true,
10
+ "feature_extractor_type": "VideoMAEFeatureExtractor",
11
+ "image_mean": [
12
+ 0.485,
13
+ 0.456,
14
+ 0.406
15
+ ],
16
+ "image_processor_type": "VideoMAEImageProcessor",
17
+ "image_std": [
18
+ 0.229,
19
+ 0.224,
20
+ 0.225
21
+ ],
22
+ "resample": 2,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "shortest_edge": 224
26
+ }
27
+ }
last-checkpoint/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5328f96b596628fd7651c7b1c7ae29e949cdf9d476be393fc643773901b0704
3
+ size 87738162
last-checkpoint/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8481b4195c9a6f25658371ced1d7dd5c2657e735abb5356dd36884e2b21cba3f
3
+ size 14244
last-checkpoint/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95f4bffc8b0fdfd83c6c96f395187e1a48fffc9b12a1bf401ba380468869892a
3
+ size 1064
last-checkpoint/trainer_state.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.012048192771084338,
3
+ "best_model_checkpoint": "videomae-small-finetuned-kinetics-finetuned-vsl-finetuned-skeleton-vsl/checkpoint-355",
4
+ "epoch": 0.05014124293785311,
5
+ "global_step": 355,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.05,
12
+ "learning_rate": 2.507062146892655e-05,
13
+ "loss": 4.6153,
14
+ "step": 355
15
+ },
16
+ {
17
+ "epoch": 0.05,
18
+ "eval_accuracy": 0.012048192771084338,
19
+ "eval_loss": 4.600752353668213,
20
+ "eval_runtime": 1309.0027,
21
+ "eval_samples_per_second": 0.507,
22
+ "eval_steps_per_second": 0.102,
23
+ "step": 355
24
+ }
25
+ ],
26
+ "max_steps": 7080,
27
+ "num_train_epochs": 9223372036854775807,
28
+ "total_flos": 5.6157377918887526e+17,
29
+ "trial_name": null,
30
+ "trial_params": null
31
+ }
last-checkpoint/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94f530f42a7f1aa3b38538d20d04a7ad890e64620e42e3d621015ec951e16413
3
+ size 4216
preprocessor_config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_normalize": true,
8
+ "do_rescale": true,
9
+ "do_resize": true,
10
+ "feature_extractor_type": "VideoMAEFeatureExtractor",
11
+ "image_mean": [
12
+ 0.485,
13
+ 0.456,
14
+ 0.406
15
+ ],
16
+ "image_processor_type": "VideoMAEImageProcessor",
17
+ "image_std": [
18
+ 0.229,
19
+ 0.224,
20
+ 0.225
21
+ ],
22
+ "resample": 2,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "shortest_edge": 224
26
+ }
27
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5328f96b596628fd7651c7b1c7ae29e949cdf9d476be393fc643773901b0704
3
+ size 87738162
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94f530f42a7f1aa3b38538d20d04a7ad890e64620e42e3d621015ec951e16413
3
+ size 4216