videomae_skeleton_v1.0 / config.json
tanthinhdt's picture
Training in progress, epoch 0
bcd239d
{
"_name_or_path": "VieSignLang/videomae-small-finetuned-kinetics-finetuned-vsl-finetuned-skeleton-vsl",
"architectures": [
"VideoMAEForVideoClassification"
],
"attention_probs_dropout_prob": 0.0,
"decoder_hidden_size": 192,
"decoder_intermediate_size": 768,
"decoder_num_attention_heads": 3,
"decoder_num_hidden_layers": 12,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.0,
"hidden_size": 384,
"id2label": {
"0": "Con ch\u00f3",
"1": "Con m\u00e8o",
"2": "Con g\u00e0",
"3": "Con v\u1ecbt",
"4": "Con r\u00f9a",
"5": "Con th\u1ecf",
"6": "Con tr\u00e2u",
"7": "Con b\u00f2",
"8": "Con d\u00ea",
"9": "Con heo",
"10": "M\u00e0u \u0111en",
"11": "M\u00e0u tr\u1eafng",
"12": "M\u00e0u \u0111\u1ecf",
"13": "M\u00e0u cam",
"14": "M\u00e0u v\u00e0ng",
"15": "M\u00e0u l\u00e1 c\u00e2y",
"16": "M\u00e0u da tr\u1eddi",
"17": "M\u00e0u h\u1ed3ng",
"18": "M\u00e0u t\u00edm",
"19": "M\u00e0u n\u00e2u",
"20": "Qu\u1ea3 d\u00e2u",
"21": "Qu\u1ea3 m\u1eadn",
"22": "Qu\u1ea3 d\u1ee9a",
"23": "Qu\u1ea3 \u0111\u00e0o",
"24": "Qu\u1ea3 \u0111u \u0111\u1ee7",
"25": "Qu\u1ea3 cam",
"26": "Qu\u1ea3 b\u01a1",
"27": "Qu\u1ea3 chu\u1ed1i",
"28": "Qu\u1ea3 xo\u00e0i",
"29": "Qu\u1ea3 d\u1eeba",
"30": "B\u1ed1",
"31": "M\u1eb9",
"32": "Con trai",
"33": "Con g\u00e1i",
"34": "V\u1ee3",
"35": "Ch\u1ed3ng",
"36": "\u00d4ng n\u1ed9i",
"37": "B\u00e0 n\u1ed9i",
"38": "\u00d4ng ngo\u1ea1i",
"39": "B\u00e0 ngo\u1ea1i",
"40": "\u0102n",
"41": "U\u1ed1ng",
"42": "Xem",
"43": "Th\u00e8m",
"44": "M\u00e1ch",
"45": "Kh\u00f3c",
"46": "C\u01b0\u1eddi",
"47": "H\u1ecdc",
"48": "D\u1ed7i",
"49": "Ch\u1ebft",
"50": "\u0110i",
"51": "Ch\u1ea1y",
"52": "B\u1eadn",
"53": "H\u00e1t",
"54": "M\u00faa",
"55": "N\u1ea5u",
"56": "N\u01b0\u1edbng",
"57": "Nh\u1ea7m l\u1eabn",
"58": "Quan s\u00e1t",
"59": "C\u1eafm tr\u1ea1i",
"60": "Cung c\u1ea5p",
"61": "B\u1eaft ch\u01b0\u1edbc",
"62": "B\u1eaft bu\u1ed9c",
"63": "B\u00e1o c\u00e1o",
"64": "Mua b\u00e1n",
"65": "Kh\u00f4ng quen",
"66": "Kh\u00f4ng n\u00ean",
"67": "Kh\u00f4ng c\u1ea7n",
"68": "Kh\u00f4ng cho",
"69": "Kh\u00f4ng nghe l\u1eddi",
"70": "M\u1eb7n",
"71": "\u0110\u1eafng",
"72": "Cay",
"73": "Ng\u1ecdt",
"74": "\u0110\u1eadm",
"75": "Nh\u1ea1t",
"76": "Ngon mi\u1ec7ng",
"77": "X\u1ea5u",
"78": "\u0110\u1eb9p",
"79": "Ch\u1eadt",
"80": "H\u1eb9p",
"81": "R\u1ed9ng",
"82": "D\u00e0i",
"83": "Cao",
"84": "L\u00f9n",
"85": "\u1ed0m",
"86": "M\u1eadp",
"87": "Ngoan",
"88": "H\u01b0",
"89": "Kh\u1ecfe",
"90": "M\u1ec7t",
"91": "\u0110au",
"92": "Gi\u1ecfi",
"93": "Ch\u0103m ch\u1ec9",
"94": "L\u01b0\u1eddi bi\u1ebfng",
"95": "T\u1ed1t b\u1ee5ng",
"96": "Th\u00fa v\u1ecb",
"97": "H\u00e0i h\u01b0\u1edbc",
"98": "D\u0169ng c\u1ea3m",
"99": "S\u00e1ng t\u1ea1o"
},
"image_size": 224,
"initializer_range": 0.02,
"intermediate_size": 1536,
"label2id": {
"B\u00e0 ngo\u1ea1i": 39,
"B\u00e0 n\u1ed9i": 37,
"B\u00e1o c\u00e1o": 63,
"B\u1eadn": 52,
"B\u1eaft bu\u1ed9c": 62,
"B\u1eaft ch\u01b0\u1edbc": 61,
"B\u1ed1": 30,
"Cao": 83,
"Cay": 72,
"Ch\u0103m ch\u1ec9": 93,
"Ch\u1ea1y": 51,
"Ch\u1eadt": 79,
"Ch\u1ebft": 49,
"Ch\u1ed3ng": 35,
"Con b\u00f2": 7,
"Con ch\u00f3": 0,
"Con d\u00ea": 8,
"Con g\u00e0": 2,
"Con g\u00e1i": 33,
"Con heo": 9,
"Con m\u00e8o": 1,
"Con r\u00f9a": 4,
"Con th\u1ecf": 5,
"Con trai": 32,
"Con tr\u00e2u": 6,
"Con v\u1ecbt": 3,
"Cung c\u1ea5p": 60,
"C\u01b0\u1eddi": 46,
"C\u1eafm tr\u1ea1i": 59,
"D\u00e0i": 82,
"D\u0169ng c\u1ea3m": 98,
"D\u1ed7i": 48,
"Gi\u1ecfi": 92,
"H\u00e0i h\u01b0\u1edbc": 97,
"H\u00e1t": 53,
"H\u01b0": 88,
"H\u1eb9p": 80,
"H\u1ecdc": 47,
"Kh\u00f3c": 45,
"Kh\u00f4ng cho": 68,
"Kh\u00f4ng c\u1ea7n": 67,
"Kh\u00f4ng nghe l\u1eddi": 69,
"Kh\u00f4ng n\u00ean": 66,
"Kh\u00f4ng quen": 65,
"Kh\u1ecfe": 89,
"L\u00f9n": 84,
"L\u01b0\u1eddi bi\u1ebfng": 94,
"Mua b\u00e1n": 64,
"M\u00e0u cam": 13,
"M\u00e0u da tr\u1eddi": 16,
"M\u00e0u h\u1ed3ng": 17,
"M\u00e0u l\u00e1 c\u00e2y": 15,
"M\u00e0u n\u00e2u": 19,
"M\u00e0u tr\u1eafng": 11,
"M\u00e0u t\u00edm": 18,
"M\u00e0u v\u00e0ng": 14,
"M\u00e0u \u0111en": 10,
"M\u00e0u \u0111\u1ecf": 12,
"M\u00e1ch": 44,
"M\u00faa": 54,
"M\u1eadp": 86,
"M\u1eb7n": 70,
"M\u1eb9": 31,
"M\u1ec7t": 90,
"Ngoan": 87,
"Ngon mi\u1ec7ng": 76,
"Ng\u1ecdt": 73,
"Nh\u1ea1t": 75,
"Nh\u1ea7m l\u1eabn": 57,
"N\u01b0\u1edbng": 56,
"N\u1ea5u": 55,
"Quan s\u00e1t": 58,
"Qu\u1ea3 b\u01a1": 26,
"Qu\u1ea3 cam": 25,
"Qu\u1ea3 chu\u1ed1i": 27,
"Qu\u1ea3 d\u00e2u": 20,
"Qu\u1ea3 d\u1ee9a": 22,
"Qu\u1ea3 d\u1eeba": 29,
"Qu\u1ea3 m\u1eadn": 21,
"Qu\u1ea3 xo\u00e0i": 28,
"Qu\u1ea3 \u0111u \u0111\u1ee7": 24,
"Qu\u1ea3 \u0111\u00e0o": 23,
"R\u1ed9ng": 81,
"S\u00e1ng t\u1ea1o": 99,
"Th\u00e8m": 43,
"Th\u00fa v\u1ecb": 96,
"T\u1ed1t b\u1ee5ng": 95,
"U\u1ed1ng": 41,
"V\u1ee3": 34,
"Xem": 42,
"X\u1ea5u": 77,
"\u00d4ng ngo\u1ea1i": 38,
"\u00d4ng n\u1ed9i": 36,
"\u0102n": 40,
"\u0110au": 91,
"\u0110i": 50,
"\u0110\u1eadm": 74,
"\u0110\u1eafng": 71,
"\u0110\u1eb9p": 78,
"\u1ed0m": 85
},
"layer_norm_eps": 1e-12,
"model_type": "videomae",
"norm_pix_loss": true,
"num_attention_heads": 16,
"num_channels": 3,
"num_frames": 16,
"num_hidden_layers": 12,
"patch_size": 16,
"problem_type": "single_label_classification",
"qkv_bias": true,
"torch_dtype": "float32",
"transformers_version": "4.28.1",
"tubelet_size": 2,
"use_mean_pooling": true
}