marofmar
commited on
Commit
โข
759ccfb
1
Parent(s):
e15dc83
Update from yjchung
Browse files- added_tokens.json +1 -0
- config.json +76 -0
- optimizer.pt +3 -0
- preprocessor_config.json +9 -0
- pytorch_model.bin +3 -0
- scheduler.pt +3 -0
- special_tokens_map.json +1 -0
- tokenizer_config.json +1 -0
- trainer_state.json +44 -0
- training_args.bin +3 -0
- vocab.json +1 -0
added_tokens.json
ADDED
@@ -0,0 +1 @@
|
|
|
1 |
+
{"<s>": 652, "</s>": 653}
|
config.json
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "./xlsr-demo-kor-char/checkpoint-800",
|
3 |
+
"activation_dropout": 0.0,
|
4 |
+
"apply_spec_augment": true,
|
5 |
+
"architectures": [
|
6 |
+
"Wav2Vec2ForCTC"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.1,
|
9 |
+
"bos_token_id": 1,
|
10 |
+
"conv_bias": true,
|
11 |
+
"conv_dim": [
|
12 |
+
512,
|
13 |
+
512,
|
14 |
+
512,
|
15 |
+
512,
|
16 |
+
512,
|
17 |
+
512,
|
18 |
+
512
|
19 |
+
],
|
20 |
+
"conv_kernel": [
|
21 |
+
10,
|
22 |
+
3,
|
23 |
+
3,
|
24 |
+
3,
|
25 |
+
3,
|
26 |
+
2,
|
27 |
+
2
|
28 |
+
],
|
29 |
+
"conv_stride": [
|
30 |
+
5,
|
31 |
+
2,
|
32 |
+
2,
|
33 |
+
2,
|
34 |
+
2,
|
35 |
+
2,
|
36 |
+
2
|
37 |
+
],
|
38 |
+
"ctc_loss_reduction": "mean",
|
39 |
+
"ctc_zero_infinity": false,
|
40 |
+
"do_stable_layer_norm": true,
|
41 |
+
"eos_token_id": 2,
|
42 |
+
"feat_extract_activation": "gelu",
|
43 |
+
"feat_extract_dropout": 0.0,
|
44 |
+
"feat_extract_norm": "layer",
|
45 |
+
"feat_proj_dropout": 0.0,
|
46 |
+
"final_dropout": 0.0,
|
47 |
+
"gradient_checkpointing": true,
|
48 |
+
"hidden_act": "gelu",
|
49 |
+
"hidden_dropout": 0.1,
|
50 |
+
"hidden_size": 1024,
|
51 |
+
"initializer_range": 0.02,
|
52 |
+
"intermediate_size": 4096,
|
53 |
+
"layer_norm_eps": 1e-05,
|
54 |
+
"layerdrop": 0.1,
|
55 |
+
"mask_channel_length": 10,
|
56 |
+
"mask_channel_min_space": 1,
|
57 |
+
"mask_channel_other": 0.0,
|
58 |
+
"mask_channel_prob": 0.0,
|
59 |
+
"mask_channel_selection": "static",
|
60 |
+
"mask_feature_length": 10,
|
61 |
+
"mask_feature_prob": 0.0,
|
62 |
+
"mask_time_length": 10,
|
63 |
+
"mask_time_min_space": 1,
|
64 |
+
"mask_time_other": 0.0,
|
65 |
+
"mask_time_prob": 0.05,
|
66 |
+
"mask_time_selection": "static",
|
67 |
+
"model_type": "wav2vec2",
|
68 |
+
"num_attention_heads": 16,
|
69 |
+
"num_conv_pos_embedding_groups": 16,
|
70 |
+
"num_conv_pos_embeddings": 128,
|
71 |
+
"num_feat_extract_layers": 7,
|
72 |
+
"num_hidden_layers": 24,
|
73 |
+
"pad_token_id": 651,
|
74 |
+
"transformers_version": "4.6.0.dev0",
|
75 |
+
"vocab_size": 652
|
76 |
+
}
|
optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b111d19759202404701a9719471580fe09ffde46eb4a7df8154ef3682a214fd
|
3 |
+
size 2495423495
|
preprocessor_config.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_normalize": true,
|
3 |
+
"feature_extractor_type": "Wav2Vec2FeatureExtractor",
|
4 |
+
"feature_size": 1,
|
5 |
+
"padding_side": "right",
|
6 |
+
"padding_value": 0.0,
|
7 |
+
"return_attention_mask": true,
|
8 |
+
"sampling_rate": 16000
|
9 |
+
}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d386b82ba46e2f57dadee92195ebba140380f170201769294d049210d1bfd73
|
3 |
+
size 1264606999
|
scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e5e996905c841773d9498c1840a3f51a29609bd92fb24627f9a732cb43ba8c9e
|
3 |
+
size 623
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
1 |
+
{"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": "./xlsr-demo-2/checkpoint-800/special_tokens_map.json", "tokenizer_file": null, "name_or_path": "./xlsr-demo-2/checkpoint-800"}
|
trainer_state.json
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 1.391272783279419,
|
3 |
+
"best_model_checkpoint": "./xlsr-demo-kor-char/checkpoint-400",
|
4 |
+
"epoch": 5.440677966101695,
|
5 |
+
"global_step": 800,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 2.72,
|
12 |
+
"learning_rate": 0.0,
|
13 |
+
"loss": 0.0243,
|
14 |
+
"step": 400
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"epoch": 2.72,
|
18 |
+
"eval_cer": 0.24245381666085744,
|
19 |
+
"eval_loss": 1.391272783279419,
|
20 |
+
"eval_runtime": 23.3049,
|
21 |
+
"eval_samples_per_second": 11.114,
|
22 |
+
"step": 400
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"epoch": 5.44,
|
26 |
+
"learning_rate": 0.0,
|
27 |
+
"loss": 0.0236,
|
28 |
+
"step": 800
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"epoch": 5.44,
|
32 |
+
"eval_cer": 0.24245381666085744,
|
33 |
+
"eval_loss": 1.391272783279419,
|
34 |
+
"eval_runtime": 23.1515,
|
35 |
+
"eval_samples_per_second": 11.187,
|
36 |
+
"step": 800
|
37 |
+
}
|
38 |
+
],
|
39 |
+
"max_steps": 4410,
|
40 |
+
"num_train_epochs": 30,
|
41 |
+
"total_flos": 7.801861030713656e+18,
|
42 |
+
"trial_name": null,
|
43 |
+
"trial_params": null
|
44 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:932a47acc7e041e8d61a79f7e34e572c2b1698ce712364fb61259a4765c23d7d
|
3 |
+
size 2351
|
vocab.json
ADDED
@@ -0,0 +1 @@
|
|
|
1 |
+
{"์": 0, "ํ": 1, "์ฐฉ": 2, "๋ฌ": 3, "๋ผ": 4, "๋ฑ
": 5, "๋ก": 6, "์ผ": 7, "์": 8, "์น ": 9, "์งง": 10, "๊ณผ": 11, "์": 12, "์ฌ": 13, "์
": 14, "๊ฒผ": 15, "์ ": 16, "๋งค": 17, "์ญ": 18, "๋ฉฐ": 19, "์จ": 20, "๋ฐธ": 21, "๋ช": 22, "์ฝ": 23, "์": 24, "๋": 25, "๋ฐ": 26, "์ปจ": 27, "๊น": 28, "๊บผ": 29, "ํผ": 30, "๋ฃ": 31, "์ง": 32, "๋ง": 33, "๋ณ": 34, "์ธ ": 35, "์ฒ": 36, "์ ": 37, "๋ฐ": 38, "์": 39, "์": 40, "ํน": 41, "๋ ": 42, "๋ค": 43, "๋ฌธ": 44, "๋ธ": 45, "๋": 46, "์ฏ": 47, "์": 48, "์ ": 49, "์": 50, "์ธ": 51, "๊ฑธ": 52, "๋ฌผ": 53, "๋ฎ": 54, "ํ": 55, "๋ฆ
": 56, "ํผ": 57, "๋": 58, "์ธ": 59, "ํ": 60, "์ฑ": 61, "์ต": 62, "์ธ": 63, "์": 64, "ํผ": 65, "์": 66, "๋ด": 67, "๋
": 68, "์ ธ": 69, "๋
": 70, "๋ง": 71, "๊ฐ": 72, "์ฉ": 73, "์": 74, "๋ง": 75, "๋ จ": 76, "์ป": 77, "๋ค": 78, "๋": 79, "์ถ": 80, "์": 81, "์ค": 82, "์": 83, "๋": 84, "๋ฆด": 85, "๋ฐ": 86, "์": 87, "๋ผ": 88, "๋": 89, "์ฌ": 90, "ํฉ": 91, "๋ง": 92, "๋ค": 93, "๋ฌด": 94, "์": 95, "๊ฐ": 96, "์ผ": 97, "๊ฐ": 98, "๋ถ": 99, "๋": 100, "์ด": 101, "๊บพ": 102, "์ธ": 103, "๋ฑ": 104, "ํ": 105, "๋ฃ": 106, "๋": 107, "๊ฐ": 108, "๊ธฐ": 109, "์ฑ": 110, "๋": 111, "ํ": 112, "์ฐฌ": 113, "์": 114, "์ฑ": 115, "๋ชจ": 116, "๋ฉ": 117, "๋ฆ": 118, "์ฐฝ": 119, "์ถฐ": 120, "์ถฉ": 121, "๋ฌ": 122, "์ฒ": 123, "์ ": 124, "์ฒจ": 125, "์ค": 126, "์ผ": 127, "์ธ": 128, "์ค": 129, "๋": 130, "์จ": 131, "์": 132, "๋ฐ": 133, "์ฌ": 134, "๋ฆฝ": 135, "์จฐ": 136, "๊ตฌ": 137, "๊ตญ": 138, "๊ฑด": 139, "ํด": 140, "์": 141, "์ผ": 142, "๋": 143, "๋": 144, "๋": 145, "์ฑ": 146, "๋": 147, "์": 148, "๋จน": 149, "์ค": 150, "๋": 151, "๋ฒ ": 152, "์ซ": 153, "๋ฒ": 154, "๋": 155, "๋ฉ": 156, "์ผ": 157, "๋": 158, "๋ต": 159, "๋ชฐ": 160, "๊ฒ": 161, "ํ": 162, "ํจ": 163, "์ฐ": 164, "๊ฑฐ": 165, "๋ด": 166, "ํค": 167, "๋ท": 168, "๊ป": 169, "์": 170, "๊ฐ": 171, "ํธ": 172, "ํ": 173, "๋": 174, "๋จ": 175, "ํ": 176, "ํ": 177, "ํ": 178, "๊ธด": 179, "์ธ": 180, "๊ทผ": 181, "ํ": 182, "๋ก ": 183, "๋": 184, "๋ ": 185, "ํฐ": 186, "์": 187, "๋
ธ": 188, "๋ค": 189, "์": 190, "ํด": 191, "์ค": 192, "๋ฉด": 193, "ํจ": 194, "์ง": 195, "๋ญ": 196, "์ก": 197, "์ฃผ": 198, "๋": 199, "๋": 200, "๊ผญ": 201, "๋จธ": 202, "์
": 203, "๋ณต": 204, "๊ณค": 205, "๊ฐ": 206, "ํฌ": 207, "์": 208, "์ผ": 209, "์": 210, "์": 211, "์ชฝ": 212, "์ฝ": 213, "์": 214, "์": 215, "ํ": 216, "์ฆ": 217, "์ฌ": 218, "์ ": 219, "๋ณ": 220, "์ฃ ": 221, "๋งฅ": 222, "๋ถ": 223, "๋ฅ": 224, "์ญ": 225, "์ท": 226, "๊ตณ": 227, "๋ฐฐ": 228, "์
": 229, "์ฝ": 230, "๋จ": 231, "๋ฐ": 232, "ํ": 233, "ํ
": 234, "๋ธ": 235, "๋
น": 236, "๋ต": 237, "๋ ธ": 238, "๊ฒฉ": 240, "๋": 241, "๋ฐฉ": 242, "๊ฐ": 243, "๋": 244, "์": 245, "์": 246, "์ฐธ": 247, "๊ณง": 248, "์น": 249, "์ฝ": 250, "๊ณณ": 251, "ํน": 252, "์ง": 253, "์": 254, "์ฌ": 255, "ํ": 256, "๊ฐ": 257, "๋ก": 258, "๋ด": 259, "์ ": 260, "ํ": 261, "ํ": 262, "์ฐ": 263, "ํด": 264, "์คฌ": 265, "๋ฅด": 266, "๋": 267, "ํ": 268, "๋ ": 269, "๋": 270, "์ฒด": 271, "๋ ": 272, "์": 273, "ํ": 274, "๋ง": 275, "์งค": 276, "ํ": 277, "๋": 278, "๋ถ": 279, "์นด": 280, "๋ฟ": 281, "ํ": 282, "์ฌ": 283, "๋ถ": 284, "ํ": 285, "๋": 286, "๋ฐฑ": 287, "์ฌ": 288, "๊ณต": 289, "๋ ค": 290, "๋ฆฌ": 291, "์": 292, "๋ด": 293, "ํ": 294, "๋ ": 295, "์ค": 296, "ํ": 297, "๋": 298, "์ฐฎ": 299, "๋ฒ": 300, "๋ป": 301, "๊นจ": 302, "์นญ": 303, "๋ฏฟ": 304, "๊ทธ": 305, "๋ฐ": 306, "๋ญ": 307, "๋": 308, "์บ": 309, "๋ฅ ": 310, "ํฟ": 311, "๋ต": 312, "์ฐ": 313, "๋ฅ": 314, "์": 315, "ํ": 316, "์ผ": 317, "์ผ": 318, "๋ถ": 319, "์ถ": 320, "์ฃ": 321, "๋ฅ": 322, "๋ด": 323, "์ ": 324, "ํ": 325, "์ฐจ": 326, "๋ ": 327, "ํ ": 328, "์": 329, "์บ ": 330, "ํฌ": 331, "์ฌ": 332, "๋ฃฐ": 333, "์ผ": 334, "๋น": 335, "์ฑ
": 336, "์กฑ": 337, "ํ
": 338, "์ปฌ": 339, "ํ": 340, "ํ": 341, "๋": 342, "์": 343, "์ง": 344, "์ปด": 345, "์ผ": 346, "์": 347, "์ค": 348, "์ถ": 349, "๋น": 350, "์ฆ": 351, "๋": 352, "์กฐ": 353, "๋จ": 354, "๋": 355, "์ญ": 356, "์ด": 357, "๋นจ": 358, "๊พธ": 359, "๋ฐ": 360, "์ญ": 361, "๋กธ": 362, "์": 363, "์ก": 364, "๋ง": 365, "ํ": 366, "์": 367, "์ธ": 368, "๋ณด": 369, "๋": 370, "๋ก": 371, "์": 372, "์ฐ": 373, "ํฉ": 374, "๋ฉ": 375, "๋": 376, "๋ฐ": 377, "๋ธ": 378, "์ ": 379, "์ถ": 380, "๋ดค": 381, "์": 382, "์ซ": 383, "๊ด": 384, "์ต": 385, "๊ฐ": 386, "์ฉ": 387, "์ธ": 388, "๋ฉ": 389, "์": 390, "๋ฎฌ": 391, "๊ฒ": 392, "๊ต": 393, "ํด": 394, "๋": 395, "์": 396, "์ ": 397, "์": 398, "๋ฅธ": 399, "์": 400, "์": 401, "์": 402, "๋ฝ": 403, "์ฌ": 404, "ํ": 405, "์ ": 406, "์ง": 407, "๋ฟ": 408, "ํ": 409, "๊ผด": 410, "๋น ": 411, "๋กฑ": 412, "๋ฏ": 413, "๋ผ": 414, "์ ": 415, "ํ": 416, "๊ถ": 417, "๋": 418, "์น": 419, "๋ซ": 420, "๋ง": 421, "๋": 422, "๋": 423, "ํธ": 424, "์ฑ": 425, "๋ฃจ": 426, "ํท": 427, "์
": 428, "๋ฐ": 429, "๋ญ": 430, "์": 431, "์ผ": 432, "๋": 433, "์ผ": 434, "์": 435, "์": 436, "ํฝ": 437, "๋ณผ": 438, "์จ": 439, "ํต": 440, "์กด": 441, "์": 442, "๋": 443, "๊ท": 444, "์": 445, "๋ฃ": 446, "๊ตด": 447, "์": 448, "์ค": 449, "์ข
": 450, "๋ด": 451, "๋ชฉ": 452, "๊ฒฝ": 453, "๋จ": 454, "๋ฏธ": 455, "๋ฌ": 456, "๊ณ": 457, "๋": 458, "์ด": 459, "ํ": 460, "๋ฑ": 461, "ํ": 462, "๊ฐ": 463, "์": 464, "๋ท": 465, "๋ฒฝ": 466, "์": 467, "๋งบ": 468, "์ฐ": 469, "์ข": 470, "๊น": 471, "๋ฝ": 472, "์ง": 473, "์ฆ": 474, "๋": 475, "ํฌ": 476, "๋จผ": 477, "๊น": 478, "๊ฒฐ": 479, "์งธ": 480, "์": 481, "์ตธ": 482, "์ฉ": 483, "๊ตฐ": 484, "๋ง": 485, "๊ณ ": 486, "ํธ": 487, "์": 488, "์": 489, "ํ": 490, "ํผ": 491, "์จ": 492, "์น": 493, "์ฐพ": 494, "ํ
": 495, "๋ค": 496, "ํ": 497, "์ต": 498, "๋ธ": 499, "์ด": 500, "๋ฐ": 501, "ํ": 502, "์": 503, "๋": 504, "๋": 505, "๋ผ": 506, "ํจ": 507, "๋ฐ": 508, "์": 509, "ํ": 510, "๊ท ": 511, "์": 512, "์จ": 513, "๋ฅ": 514, "๊น": 515, "๊ฐ": 516, "๊ฐ": 517, "์ท": 518, "๊ฒจ": 519, "๋ฒ": 520, "ํญ": 521, "ํ": 522, "ํ": 523, "์ ": 524, "์ก": 525, "ํ": 526, "์": 527, "๋": 528, "๊ธ": 529, "์": 530, "ํด": 531, "๋ฉ": 532, "์ ": 533, "๋": 534, "์ด": 535, "๋ฐ": 536, "์ด": 537, "๋": 538, "๊ต": 539, "๋ซ": 540, "ํ": 541, "๋": 542, "ํ": 543, "ํฌ": 544, "๋ผ": 545, "๋ ฌ": 546, "๋ด": 547, "๋ฅผ": 548, "์": 549, "์ฝ": 550, "์": 551, "ํจ": 552, "๊ธ": 553, "๊ฒ ": 554, "์ฅ": 555, "ํ
": 556, "์ง": 557, "์ฏค": 558, "๊ธ": 559, "์ ": 560, "๊ด": 561, "๋จ": 562, "๋ฝ": 563, "์ต": 564, "์ค": 565, "์
": 566, "๋ค": 567, "ํฐ": 568, "๋ณธ": 569, "์ง": 570, "๋": 571, "ํ ": 572, "์": 573, "๊ฒ": 574, "์ด": 575, "์
": 576, "๋": 577, "์ปค": 578, "์ต": 579, "์ซ": 580, "๋ฆผ": 581, "์ทจ": 582, "๋น": 583, "ํ": 584, "์ผ": 585, "์ณ": 586, "๋": 587, "ํฅ": 588, "์ต": 589, "์": 590, "๊ฑ": 591, "ํผ": 592, "์": 593, "์": 594, "์ด": 595, "๋ฏ": 596, "์ ": 597, "๋": 598, "๋ฆฐ": 599, "๋": 600, "์": 601, "๋ช
": 602, "์ฌ": 603, "๊ณฑ": 604, "๋ผ": 605, "๊ฐธ": 606, "๊ณจ": 607, "๊ฒฌ": 608, "์ถ": 609, "๋": 610, "์นจ": 611, "๋": 612, "๋ฏ": 613, "๋": 614, "๋": 615, "์จ": 616, "๊ทน": 617, "ํ": 618, "๋": 619, "์ซ": 620, "์ ": 621, "์ฒซ": 622, "ํผ": 623, "์ค": 624, "์ชผ": 625, "๊ธธ": 626, "ํ": 627, "์ฒญ": 628, "์ก": 629, "ํ": 630, "๋ชป": 631, "์ณค": 632, "ํจ": 633, "์ค": 634, "๊ดด": 635, "ํฐ": 636, "์": 637, "๋ฌ": 638, "์ก": 639, "๋ฒ": 640, "ํ": 641, "๋": 642, "๊ธ": 643, "์
": 644, "์ข": 645, "๊ฟ": 646, "์ฆ": 647, "๊ฐ": 648, "์ข": 649, "|": 239, "[UNK]": 650, "[PAD]": 651}
|