tsrivatsav commited on
Commit
d797350
1 Parent(s): f2f6f68

add tokenizer

Browse files
added_tokens.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "</s>": 30,
3
- "<s>": 29
4
  }
 
1
  {
2
+ "</s>": 29,
3
+ "<s>": 28
4
  }
runs/Jul21_15-05-08_Tejas-XPS22/1658430356.7334583/events.out.tfevents.1658430356.Tejas-XPS22.1656.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6effd257ea22f1c6637a0fc79ee01673fcc008dc5d43eba29a96440710629916
3
+ size 5385
runs/Jul21_15-05-08_Tejas-XPS22/events.out.tfevents.1658430356.Tejas-XPS22.1656.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1a6972ea857ef8e2fecbe511b8a20a4b000442d7e75ed44356b07aebf3389b5
3
+ size 5092
special_tokens_map.json CHANGED
@@ -18,5 +18,5 @@
18
  "bos_token": "<s>",
19
  "eos_token": "</s>",
20
  "pad_token": "[PAD]",
21
- "unk_token": "[UNK]"
22
  }
 
18
  "bos_token": "<s>",
19
  "eos_token": "</s>",
20
  "pad_token": "[PAD]",
21
+ "unk_token": "<unk>"
22
  }
tokenizer_config.json CHANGED
@@ -7,6 +7,6 @@
7
  "replace_word_delimiter_char": " ",
8
  "special_tokens_map_file": null,
9
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
10
- "unk_token": "[UNK]",
11
  "word_delimiter_token": " "
12
  }
 
7
  "replace_word_delimiter_char": " ",
8
  "special_tokens_map_file": null,
9
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
10
+ "unk_token": "<unk>",
11
  "word_delimiter_token": " "
12
  }
vocab.json CHANGED
@@ -1,7 +1,6 @@
1
  {
2
  " ": 0,
3
- "[PAD]": 28,
4
- "[UNK]": 27,
5
  "a": 1,
6
  "b": 2,
7
  "c": 3,
 
1
  {
2
  " ": 0,
3
+ "[PAD]": 27,
 
4
  "a": 1,
5
  "b": 2,
6
  "c": 3,