{ "_name_or_path": "VieSignLang/videomae-small-finetuned-kinetics-finetuned-vsl-finetuned-skeleton-vsl", "architectures": [ "VideoMAEForVideoClassification" ], "attention_probs_dropout_prob": 0.0, "decoder_hidden_size": 192, "decoder_intermediate_size": 768, "decoder_num_attention_heads": 3, "decoder_num_hidden_layers": 12, "hidden_act": "gelu", "hidden_dropout_prob": 0.0, "hidden_size": 384, "id2label": { "0": "Con ch\u00f3", "1": "Con m\u00e8o", "2": "Con g\u00e0", "3": "Con v\u1ecbt", "4": "Con r\u00f9a", "5": "Con th\u1ecf", "6": "Con tr\u00e2u", "7": "Con b\u00f2", "8": "Con d\u00ea", "9": "Con heo", "10": "M\u00e0u \u0111en", "11": "M\u00e0u tr\u1eafng", "12": "M\u00e0u \u0111\u1ecf", "13": "M\u00e0u cam", "14": "M\u00e0u v\u00e0ng", "15": "M\u00e0u l\u00e1 c\u00e2y", "16": "M\u00e0u da tr\u1eddi", "17": "M\u00e0u h\u1ed3ng", "18": "M\u00e0u t\u00edm", "19": "M\u00e0u n\u00e2u", "20": "Qu\u1ea3 d\u00e2u", "21": "Qu\u1ea3 m\u1eadn", "22": "Qu\u1ea3 d\u1ee9a", "23": "Qu\u1ea3 \u0111\u00e0o", "24": "Qu\u1ea3 \u0111u \u0111\u1ee7", "25": "Qu\u1ea3 cam", "26": "Qu\u1ea3 b\u01a1", "27": "Qu\u1ea3 chu\u1ed1i", "28": "Qu\u1ea3 xo\u00e0i", "29": "Qu\u1ea3 d\u1eeba", "30": "B\u1ed1", "31": "M\u1eb9", "32": "Con trai", "33": "Con g\u00e1i", "34": "V\u1ee3", "35": "Ch\u1ed3ng", "36": "\u00d4ng n\u1ed9i", "37": "B\u00e0 n\u1ed9i", "38": "\u00d4ng ngo\u1ea1i", "39": "B\u00e0 ngo\u1ea1i", "40": "\u0102n", "41": "U\u1ed1ng", "42": "Xem", "43": "Th\u00e8m", "44": "M\u00e1ch", "45": "Kh\u00f3c", "46": "C\u01b0\u1eddi", "47": "H\u1ecdc", "48": "D\u1ed7i", "49": "Ch\u1ebft", "50": "\u0110i", "51": "Ch\u1ea1y", "52": "B\u1eadn", "53": "H\u00e1t", "54": "M\u00faa", "55": "N\u1ea5u", "56": "N\u01b0\u1edbng", "57": "Nh\u1ea7m l\u1eabn", "58": "Quan s\u00e1t", "59": "C\u1eafm tr\u1ea1i", "60": "Cung c\u1ea5p", "61": "B\u1eaft ch\u01b0\u1edbc", "62": "B\u1eaft bu\u1ed9c", "63": "B\u00e1o c\u00e1o", "64": "Mua b\u00e1n", "65": "Kh\u00f4ng quen", "66": "Kh\u00f4ng n\u00ean", "67": "Kh\u00f4ng c\u1ea7n", "68": "Kh\u00f4ng cho", "69": "Kh\u00f4ng nghe l\u1eddi", "70": "M\u1eb7n", "71": "\u0110\u1eafng", "72": "Cay", "73": "Ng\u1ecdt", "74": "\u0110\u1eadm", "75": "Nh\u1ea1t", "76": "Ngon mi\u1ec7ng", "77": "X\u1ea5u", "78": "\u0110\u1eb9p", "79": "Ch\u1eadt", "80": "H\u1eb9p", "81": "R\u1ed9ng", "82": "D\u00e0i", "83": "Cao", "84": "L\u00f9n", "85": "\u1ed0m", "86": "M\u1eadp", "87": "Ngoan", "88": "H\u01b0", "89": "Kh\u1ecfe", "90": "M\u1ec7t", "91": "\u0110au", "92": "Gi\u1ecfi", "93": "Ch\u0103m ch\u1ec9", "94": "L\u01b0\u1eddi bi\u1ebfng", "95": "T\u1ed1t b\u1ee5ng", "96": "Th\u00fa v\u1ecb", "97": "H\u00e0i h\u01b0\u1edbc", "98": "D\u0169ng c\u1ea3m", "99": "S\u00e1ng t\u1ea1o" }, "image_size": 224, "initializer_range": 0.02, "intermediate_size": 1536, "label2id": { "B\u00e0 ngo\u1ea1i": 39, "B\u00e0 n\u1ed9i": 37, "B\u00e1o c\u00e1o": 63, "B\u1eadn": 52, "B\u1eaft bu\u1ed9c": 62, "B\u1eaft ch\u01b0\u1edbc": 61, "B\u1ed1": 30, "Cao": 83, "Cay": 72, "Ch\u0103m ch\u1ec9": 93, "Ch\u1ea1y": 51, "Ch\u1eadt": 79, "Ch\u1ebft": 49, "Ch\u1ed3ng": 35, "Con b\u00f2": 7, "Con ch\u00f3": 0, "Con d\u00ea": 8, "Con g\u00e0": 2, "Con g\u00e1i": 33, "Con heo": 9, "Con m\u00e8o": 1, "Con r\u00f9a": 4, "Con th\u1ecf": 5, "Con trai": 32, "Con tr\u00e2u": 6, "Con v\u1ecbt": 3, "Cung c\u1ea5p": 60, "C\u01b0\u1eddi": 46, "C\u1eafm tr\u1ea1i": 59, "D\u00e0i": 82, "D\u0169ng c\u1ea3m": 98, "D\u1ed7i": 48, "Gi\u1ecfi": 92, "H\u00e0i h\u01b0\u1edbc": 97, "H\u00e1t": 53, "H\u01b0": 88, "H\u1eb9p": 80, "H\u1ecdc": 47, "Kh\u00f3c": 45, "Kh\u00f4ng cho": 68, "Kh\u00f4ng c\u1ea7n": 67, "Kh\u00f4ng nghe l\u1eddi": 69, "Kh\u00f4ng n\u00ean": 66, "Kh\u00f4ng quen": 65, "Kh\u1ecfe": 89, "L\u00f9n": 84, "L\u01b0\u1eddi bi\u1ebfng": 94, "Mua b\u00e1n": 64, "M\u00e0u cam": 13, "M\u00e0u da tr\u1eddi": 16, "M\u00e0u h\u1ed3ng": 17, "M\u00e0u l\u00e1 c\u00e2y": 15, "M\u00e0u n\u00e2u": 19, "M\u00e0u tr\u1eafng": 11, "M\u00e0u t\u00edm": 18, "M\u00e0u v\u00e0ng": 14, "M\u00e0u \u0111en": 10, "M\u00e0u \u0111\u1ecf": 12, "M\u00e1ch": 44, "M\u00faa": 54, "M\u1eadp": 86, "M\u1eb7n": 70, "M\u1eb9": 31, "M\u1ec7t": 90, "Ngoan": 87, "Ngon mi\u1ec7ng": 76, "Ng\u1ecdt": 73, "Nh\u1ea1t": 75, "Nh\u1ea7m l\u1eabn": 57, "N\u01b0\u1edbng": 56, "N\u1ea5u": 55, "Quan s\u00e1t": 58, "Qu\u1ea3 b\u01a1": 26, "Qu\u1ea3 cam": 25, "Qu\u1ea3 chu\u1ed1i": 27, "Qu\u1ea3 d\u00e2u": 20, "Qu\u1ea3 d\u1ee9a": 22, "Qu\u1ea3 d\u1eeba": 29, "Qu\u1ea3 m\u1eadn": 21, "Qu\u1ea3 xo\u00e0i": 28, "Qu\u1ea3 \u0111u \u0111\u1ee7": 24, "Qu\u1ea3 \u0111\u00e0o": 23, "R\u1ed9ng": 81, "S\u00e1ng t\u1ea1o": 99, "Th\u00e8m": 43, "Th\u00fa v\u1ecb": 96, "T\u1ed1t b\u1ee5ng": 95, "U\u1ed1ng": 41, "V\u1ee3": 34, "Xem": 42, "X\u1ea5u": 77, "\u00d4ng ngo\u1ea1i": 38, "\u00d4ng n\u1ed9i": 36, "\u0102n": 40, "\u0110au": 91, "\u0110i": 50, "\u0110\u1eadm": 74, "\u0110\u1eafng": 71, "\u0110\u1eb9p": 78, "\u1ed0m": 85 }, "layer_norm_eps": 1e-12, "model_type": "videomae", "norm_pix_loss": true, "num_attention_heads": 16, "num_channels": 3, "num_frames": 16, "num_hidden_layers": 12, "patch_size": 16, "problem_type": "single_label_classification", "qkv_bias": true, "torch_dtype": "float32", "transformers_version": "4.28.1", "tubelet_size": 2, "use_mean_pooling": true }