marofmar
commited on
Commit
โข
38881ff
1
Parent(s):
374d523
First version of the your-model-name model and tokenizer.
Browse files- config.json +76 -0
- preprocessor_config.json +9 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +1 -0
- tokenizer_config.json +1 -0
- vocab.json +1 -0
config.json
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "facebook/wav2vec2-large-xlsr-53",
|
3 |
+
"activation_dropout": 0.0,
|
4 |
+
"apply_spec_augment": true,
|
5 |
+
"architectures": [
|
6 |
+
"Wav2Vec2ForCTC"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.1,
|
9 |
+
"bos_token_id": 1,
|
10 |
+
"conv_bias": true,
|
11 |
+
"conv_dim": [
|
12 |
+
512,
|
13 |
+
512,
|
14 |
+
512,
|
15 |
+
512,
|
16 |
+
512,
|
17 |
+
512,
|
18 |
+
512
|
19 |
+
],
|
20 |
+
"conv_kernel": [
|
21 |
+
10,
|
22 |
+
3,
|
23 |
+
3,
|
24 |
+
3,
|
25 |
+
3,
|
26 |
+
2,
|
27 |
+
2
|
28 |
+
],
|
29 |
+
"conv_stride": [
|
30 |
+
5,
|
31 |
+
2,
|
32 |
+
2,
|
33 |
+
2,
|
34 |
+
2,
|
35 |
+
2,
|
36 |
+
2
|
37 |
+
],
|
38 |
+
"ctc_loss_reduction": "mean",
|
39 |
+
"ctc_zero_infinity": false,
|
40 |
+
"do_stable_layer_norm": true,
|
41 |
+
"eos_token_id": 2,
|
42 |
+
"feat_extract_activation": "gelu",
|
43 |
+
"feat_extract_dropout": 0.0,
|
44 |
+
"feat_extract_norm": "layer",
|
45 |
+
"feat_proj_dropout": 0.0,
|
46 |
+
"final_dropout": 0.0,
|
47 |
+
"gradient_checkpointing": true,
|
48 |
+
"hidden_act": "gelu",
|
49 |
+
"hidden_dropout": 0.1,
|
50 |
+
"hidden_size": 1024,
|
51 |
+
"initializer_range": 0.02,
|
52 |
+
"intermediate_size": 4096,
|
53 |
+
"layer_norm_eps": 1e-05,
|
54 |
+
"layerdrop": 0.1,
|
55 |
+
"mask_channel_length": 10,
|
56 |
+
"mask_channel_min_space": 1,
|
57 |
+
"mask_channel_other": 0.0,
|
58 |
+
"mask_channel_prob": 0.0,
|
59 |
+
"mask_channel_selection": "static",
|
60 |
+
"mask_feature_length": 10,
|
61 |
+
"mask_feature_prob": 0.0,
|
62 |
+
"mask_time_length": 10,
|
63 |
+
"mask_time_min_space": 1,
|
64 |
+
"mask_time_other": 0.0,
|
65 |
+
"mask_time_prob": 0.05,
|
66 |
+
"mask_time_selection": "static",
|
67 |
+
"model_type": "wav2vec2",
|
68 |
+
"num_attention_heads": 16,
|
69 |
+
"num_conv_pos_embedding_groups": 16,
|
70 |
+
"num_conv_pos_embeddings": 128,
|
71 |
+
"num_feat_extract_layers": 7,
|
72 |
+
"num_hidden_layers": 24,
|
73 |
+
"pad_token_id": 259,
|
74 |
+
"transformers_version": "4.6.0.dev0",
|
75 |
+
"vocab_size": 260
|
76 |
+
}
|
preprocessor_config.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_normalize": true,
|
3 |
+
"feature_extractor_type": "Wav2Vec2FeatureExtractor",
|
4 |
+
"feature_size": 1,
|
5 |
+
"padding_side": "right",
|
6 |
+
"padding_value": 0.0,
|
7 |
+
"return_attention_mask": true,
|
8 |
+
"sampling_rate": 16000
|
9 |
+
}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:63ce341388d47769a2497a4f39f7008598ffbb18bc28e1230e82e34a56f8f384
|
3 |
+
size 1262999831
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|"}
|
vocab.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"์ผ": 0, "์ผ": 1, "๋ถ": 2, "์": 3, "๋": 4, "๋ฌ": 5, "๋": 6, "๋ณธ": 7, "์ ": 8, "์": 9, "์ด": 10, "๋": 11, "๋": 12, "๊ฒจ": 13, "๊ทผ": 14, "์ค": 15, "์ฑ": 16, "๋": 17, "๋ฅ": 18, "๋ค": 19, "์ฐ": 20, "์": 21, "์ ": 22, "๋ด": 23, "๊ณผ": 24, "์": 25, "๋น": 26, "ํ": 27, "์": 28, "์": 29, "๊ฒฝ": 30, "์
": 31, "ํ": 32, "๊ฐ": 33, "1": 34, "๊ฐ": 35, "๋ญ": 36, "์": 37, "์น": 38, "๋ฐ": 39, "์ง": 40, "์ฐฝ": 41, "๊ฒ": 42, "ํ": 43, "๋ค": 44, "ํ": 45, "๋ฅด": 46, "๋ฌด": 47, "๊ธ": 48, "๊ธ": 49, "ํ": 50, "๊ฒ": 51, "ํ": 52, "์ฅ": 53, "๋ ": 54, "๋ฒ": 55, "์": 56, "๋": 57, "๊ฒ ": 58, "๊ทธ": 59, "์": 60, "๊ต": 61, "ํ
": 62, "์": 63, "๋ง": 64, "์จ": 65, "์": 66, "๋ผ": 67, "๋ก ": 68, "๋ฆฌ": 69, "๋ฉฐ": 70, "์ฉ": 71, "์": 72, "๋ฌ": 73, "๊ตฌ": 74, "๋": 75, "์ฌ": 76, "์ถ": 77, "๊ฐ": 78, "๋ธ": 79, "๋ฆ": 80, "๊ฐ": 81, "์ซ": 82, ".": 83, "๋ฏ": 84, "์": 85, "๊ฐ": 86, "๋ง": 87, "๋ฃ": 88, "๋น": 89, "์": 90, "๋ป": 91, "์ด": 92, "๊ฒฉ": 93, "์": 94, "์ฒ": 95, "๋ก": 96, "๋ง": 97, "์": 98, "๋": 99, "ํธ": 100, "๋ฌผ": 101, "ํ": 102, "์ง": 103, "ํฅ": 104, "๋ง": 105, "์": 106, "๊ฐ": 107, "์ฌ": 108, "๋ ค": 109, "๋ฅผ": 110, "๊ฐ": 111, "๋ฐ": 112, "๋ฉด": 113, "์ญ": 114, "์ด": 115, "๊ฐ": 116, "๋ค": 117, "ํน": 118, "๋ฐ": 119, "ํฌ": 120, "๊ธธ": 121, "ํ": 122, "๋ ": 123, "๋ฅ": 124, "์ง": 125, "์ง": 126, "๋": 127, "๋จ": 128, "ํฌ": 129, "์ต": 130, "ํ": 131, "๋": 132, "ํด": 133, "๋ฐฉ": 134, "์": 135, "์ข": 136, "๋": 137, "์ต": 138, "๊ตญ": 139, "ํ ": 140, "๋ฆฐ": 141, "๋ก": 142, "์ปด": 143, "์ต": 144, "๋ ": 145, "๋ง": 146, "์": 147, "์ผ": 148, "์ค": 149, "ํ": 150, "์ฃ ": 151, "๋ฐ": 152, "๋": 153, "๋ค": 154, "์": 155, "์": 156, "์ ธ": 157, "์": 158, "๊น": 159, "๋ญ": 160, "์ฐ": 161, "์ ": 162, "์ ": 163, "๋ฐ": 164, "ํ": 165, "๊ฑธ": 166, "์ข": 167, "๊ถ": 168, "๋": 170, "์ง": 171, "์ธ": 172, "๊ฟ": 173, "๋ฐ": 174, "์": 175, "์ค": 176, "ํ": 177, "๋ง": 178, "ํ": 179, "๋ชจ": 180, "ํ": 181, "๋": 182, "๋": 183, "์นด": 184, "์": 185, "์น": 186, "๋ฆผ": 187, "์ด": 188, "๋ฏธ": 189, "๋": 190, "์ฐจ": 191, "๋": 192, "๊ฒ": 193, "์ด": 194, "์ก": 195, "์ฐ": 196, "๋": 197, "์จ": 198, "ํญ": 199, "๊ณ ": 200, "์ธ": 201, "๋ณด": 202, "์": 203, "์ถฉ": 204, "์": 205, "๊ฑฐ": 206, "๋": 207, "๋ญ": 208, "๋ฝ": 209, "๋": 210, "๋ง": 211, "๊ธฐ": 212, "์ฌ": 213, "ํ": 214, "์": 215, "์ผ": 216, "๋": 217, "๋": 218, "๋": 219, "์ฃผ": 220, "์ก": 221, "ํ": 222, "๊ฑด": 223, "๋": 224, "์ค": 225, "๋ฅธ": 226, "์ ": 227, "๊ฒฐ": 228, "์ ": 229, "๋": 230, "๋ชป": 231, "๋ถ": 232, "์": 233, "๋": 234, "์จ": 235, "์": 236, "์ ": 237, "๊พธ": 238, "๋ผ": 239, "๋ผ": 240, "์ผ": 241, "๋ถ": 242, "์ถ": 243, "๋ฒ": 244, "์ฝ": 245, "๋": 246, "์
": 247, "๋น": 248, "๋ฐฐ": 249, "๋ฌธ": 250, "ํ": 251, "๋": 252, "์ ": 253, "ํ": 254, "์": 255, "๋
": 256, "์ผ": 257, "|": 169, "[UNK]": 258, "[PAD]": 259}
|