whisper-large-v2

#83
by pingqw - opened
generation_config.json CHANGED
@@ -212,11 +212,10 @@
212
  "<|yo|>": 50325,
213
  "<|zh|>": 50260
214
  },
215
- "max_initial_timestamp_index": 50,
216
  "max_length": 448,
217
  "no_timestamps_token_id": 50363,
218
  "pad_token_id": 50257,
219
- "prev_sot_token_id": 50361,
220
  "return_timestamps": false,
221
  "suppress_tokens": [
222
  1,
 
212
  "<|yo|>": 50325,
213
  "<|zh|>": 50260
214
  },
215
+ "max_initial_timestamp_index": 1,
216
  "max_length": 448,
217
  "no_timestamps_token_id": 50363,
218
  "pad_token_id": 50257,
 
219
  "return_timestamps": false,
220
  "suppress_tokens": [
221
  1,
merges.txt CHANGED
@@ -1,5 +1,4 @@
1
  #version: 0.2
2
- Ġ t
3
  Ġ a
4
  Ġt h
5
  i n
 
1
  #version: 0.2
 
2
  Ġ a
3
  Ġt h
4
  i n
special_tokens_map.json CHANGED
@@ -111,28 +111,22 @@
111
  "bos_token": {
112
  "content": "<|endoftext|>",
113
  "lstrip": false,
114
- "normalized": false,
115
  "rstrip": false,
116
  "single_word": false
117
  },
118
  "eos_token": {
119
  "content": "<|endoftext|>",
120
  "lstrip": false,
121
- "normalized": false,
122
- "rstrip": false,
123
- "single_word": false
124
- },
125
- "pad_token": {
126
- "content": "<|endoftext|>",
127
- "lstrip": false,
128
- "normalized": false,
129
  "rstrip": false,
130
  "single_word": false
131
  },
 
132
  "unk_token": {
133
  "content": "<|endoftext|>",
134
  "lstrip": false,
135
- "normalized": false,
136
  "rstrip": false,
137
  "single_word": false
138
  }
 
111
  "bos_token": {
112
  "content": "<|endoftext|>",
113
  "lstrip": false,
114
+ "normalized": true,
115
  "rstrip": false,
116
  "single_word": false
117
  },
118
  "eos_token": {
119
  "content": "<|endoftext|>",
120
  "lstrip": false,
121
+ "normalized": true,
 
 
 
 
 
 
 
122
  "rstrip": false,
123
  "single_word": false
124
  },
125
+ "pad_token": "<|endoftext|>",
126
  "unk_token": {
127
  "content": "<|endoftext|>",
128
  "lstrip": false,
129
+ "normalized": true,
130
  "rstrip": false,
131
  "single_word": false
132
  }
tokenizer.json CHANGED
@@ -64848,7 +64848,6 @@
64848
  "<|endoftext|>": 50257
64849
  },
64850
  "merges": [
64851
- "Ġ t",
64852
  "Ġ a",
64853
  "Ġt h",
64854
  "i n",
 
64848
  "<|endoftext|>": 50257
64849
  },
64850
  "merges": [
 
64851
  "Ġ a",
64852
  "Ġt h",
64853
  "i n",
tokenizer_config.json CHANGED
The diff for this file is too large to render. See raw diff
 
vocab.json CHANGED
The diff for this file is too large to render. See raw diff