ArthurZ HF staff commited on
Commit
29092c4
1 Parent(s): fdf7796

Upload processor

Browse files
Files changed (4) hide show
  1. added_tokens.json +106 -106
  2. merges.txt +0 -0
  3. tokenizer_config.json +1 -1
  4. vocab.json +0 -0
added_tokens.json CHANGED
@@ -1,108 +1,108 @@
1
  {
2
- "<|af|>": 50327,
3
- "<|am|>": 50334,
4
- "<|ar|>": 50272,
5
- "<|as|>": 50350,
6
- "<|az|>": 50304,
7
- "<|ba|>": 50355,
8
- "<|be|>": 50330,
9
- "<|bg|>": 50292,
10
- "<|bn|>": 50302,
11
- "<|bo|>": 50347,
12
- "<|br|>": 50309,
13
- "<|bs|>": 50315,
14
- "<|ca|>": 50270,
15
- "<|cs|>": 50283,
16
- "<|cy|>": 50297,
17
- "<|da|>": 50285,
18
- "<|de|>": 50261,
19
- "<|el|>": 50281,
20
- "<|en|>": 50259,
21
- "<|es|>": 50262,
22
- "<|et|>": 50307,
23
- "<|eu|>": 50310,
24
- "<|fa|>": 50300,
25
- "<|fi|>": 50277,
26
- "<|fo|>": 50338,
27
- "<|fr|>": 50265,
28
- "<|gl|>": 50319,
29
- "<|gu|>": 50333,
30
- "<|haw|>": 50352,
31
- "<|ha|>": 50354,
32
- "<|hi|>": 50276,
33
- "<|hr|>": 50291,
34
- "<|ht|>": 50339,
35
- "<|hu|>": 50286,
36
- "<|hy|>": 50312,
37
- "<|id|>": 50275,
38
- "<|is|>": 50311,
39
- "<|it|>": 50274,
40
- "<|iw|>": 50279,
41
- "<|ja|>": 50266,
42
- "<|jw|>": 50356,
43
- "<|ka|>": 50329,
44
- "<|kk|>": 50316,
45
- "<|km|>": 50323,
46
- "<|kn|>": 50306,
47
- "<|ko|>": 50264,
48
- "<|la|>": 50294,
49
- "<|lb|>": 50345,
50
- "<|ln|>": 50353,
51
- "<|lo|>": 50336,
52
- "<|lt|>": 50293,
53
- "<|lv|>": 50301,
54
- "<|mg|>": 50349,
55
- "<|mi|>": 50295,
56
- "<|mk|>": 50308,
57
- "<|ml|>": 50296,
58
- "<|mn|>": 50314,
59
- "<|mr|>": 50320,
60
- "<|ms|>": 50282,
61
- "<|mt|>": 50343,
62
- "<|my|>": 50346,
63
- "<|ne|>": 50313,
64
- "<|nl|>": 50271,
65
- "<|nn|>": 50342,
66
- "<|nocaptions|>": 50362,
67
- "<|notimestamps|>": 50363,
68
- "<|no|>": 50288,
69
- "<|oc|>": 50328,
70
- "<|pa|>": 50321,
71
- "<|pl|>": 50269,
72
- "<|ps|>": 50340,
73
- "<|pt|>": 50267,
74
- "<|ro|>": 50284,
75
- "<|ru|>": 50263,
76
- "<|sa|>": 50344,
77
- "<|sd|>": 50332,
78
- "<|si|>": 50322,
79
- "<|sk|>": 50298,
80
- "<|sl|>": 50305,
81
- "<|sn|>": 50324,
82
- "<|so|>": 50326,
83
- "<|sq|>": 50317,
84
- "<|sr|>": 50303,
85
- "<|startoflm|>": 50360,
86
- "<|startofprev|>": 50361,
87
- "<|startoftranscript|>": 50258,
88
- "<|su|>": 50357,
89
- "<|sv|>": 50273,
90
- "<|sw|>": 50318,
91
- "<|ta|>": 50287,
92
- "<|te|>": 50299,
93
- "<|tg|>": 50331,
94
- "<|th|>": 50289,
95
- "<|tk|>": 50341,
96
- "<|tl|>": 50348,
97
- "<|transcribe|>": 50359,
98
- "<|translate|>": 50358,
99
- "<|tr|>": 50268,
100
- "<|tt|>": 50351,
101
- "<|uk|>": 50280,
102
- "<|ur|>": 50290,
103
- "<|uz|>": 50337,
104
- "<|vi|>": 50278,
105
- "<|yi|>": 50335,
106
- "<|yo|>": 50325,
107
- "<|zh|>": 50260
108
  }
 
1
  {
2
+ "<|af|>": 50326,
3
+ "<|am|>": 50333,
4
+ "<|ar|>": 50271,
5
+ "<|as|>": 50349,
6
+ "<|az|>": 50303,
7
+ "<|ba|>": 50354,
8
+ "<|be|>": 50329,
9
+ "<|bg|>": 50291,
10
+ "<|bn|>": 50301,
11
+ "<|bo|>": 50346,
12
+ "<|br|>": 50308,
13
+ "<|bs|>": 50314,
14
+ "<|ca|>": 50269,
15
+ "<|cs|>": 50282,
16
+ "<|cy|>": 50296,
17
+ "<|da|>": 50284,
18
+ "<|de|>": 50260,
19
+ "<|el|>": 50280,
20
+ "<|en|>": 50258,
21
+ "<|es|>": 50261,
22
+ "<|et|>": 50306,
23
+ "<|eu|>": 50309,
24
+ "<|fa|>": 50299,
25
+ "<|fi|>": 50276,
26
+ "<|fo|>": 50337,
27
+ "<|fr|>": 50264,
28
+ "<|gl|>": 50318,
29
+ "<|gu|>": 50332,
30
+ "<|haw|>": 50351,
31
+ "<|ha|>": 50353,
32
+ "<|hi|>": 50275,
33
+ "<|hr|>": 50290,
34
+ "<|ht|>": 50338,
35
+ "<|hu|>": 50285,
36
+ "<|hy|>": 50311,
37
+ "<|id|>": 50274,
38
+ "<|is|>": 50310,
39
+ "<|it|>": 50273,
40
+ "<|iw|>": 50278,
41
+ "<|ja|>": 50265,
42
+ "<|jw|>": 50355,
43
+ "<|ka|>": 50328,
44
+ "<|kk|>": 50315,
45
+ "<|km|>": 50322,
46
+ "<|kn|>": 50305,
47
+ "<|ko|>": 50263,
48
+ "<|la|>": 50293,
49
+ "<|lb|>": 50344,
50
+ "<|ln|>": 50352,
51
+ "<|lo|>": 50335,
52
+ "<|lt|>": 50292,
53
+ "<|lv|>": 50300,
54
+ "<|mg|>": 50348,
55
+ "<|mi|>": 50294,
56
+ "<|mk|>": 50307,
57
+ "<|ml|>": 50295,
58
+ "<|mn|>": 50313,
59
+ "<|mr|>": 50319,
60
+ "<|ms|>": 50281,
61
+ "<|mt|>": 50342,
62
+ "<|my|>": 50345,
63
+ "<|ne|>": 50312,
64
+ "<|nl|>": 50270,
65
+ "<|nn|>": 50341,
66
+ "<|nocaptions|>": 50361,
67
+ "<|notimestamps|>": 50362,
68
+ "<|no|>": 50287,
69
+ "<|oc|>": 50327,
70
+ "<|pa|>": 50320,
71
+ "<|pl|>": 50268,
72
+ "<|ps|>": 50339,
73
+ "<|pt|>": 50266,
74
+ "<|ro|>": 50283,
75
+ "<|ru|>": 50262,
76
+ "<|sa|>": 50343,
77
+ "<|sd|>": 50331,
78
+ "<|si|>": 50321,
79
+ "<|sk|>": 50297,
80
+ "<|sl|>": 50304,
81
+ "<|sn|>": 50323,
82
+ "<|so|>": 50325,
83
+ "<|sq|>": 50316,
84
+ "<|sr|>": 50302,
85
+ "<|startoflm|>": 50359,
86
+ "<|startofprev|>": 50360,
87
+ "<|startoftranscript|>": 50257,
88
+ "<|su|>": 50356,
89
+ "<|sv|>": 50272,
90
+ "<|sw|>": 50317,
91
+ "<|ta|>": 50286,
92
+ "<|te|>": 50298,
93
+ "<|tg|>": 50330,
94
+ "<|th|>": 50288,
95
+ "<|tk|>": 50340,
96
+ "<|tl|>": 50347,
97
+ "<|transcribe|>": 50358,
98
+ "<|translate|>": 50357,
99
+ "<|tr|>": 50267,
100
+ "<|tt|>": 50350,
101
+ "<|uk|>": 50279,
102
+ "<|ur|>": 50289,
103
+ "<|uz|>": 50336,
104
+ "<|vi|>": 50277,
105
+ "<|yi|>": 50334,
106
+ "<|yo|>": 50324,
107
+ "<|zh|>": 50259
108
  }
merges.txt CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -19,7 +19,7 @@
19
  },
20
  "errors": "replace",
21
  "model_max_length": 1024,
22
- "name_or_path": "ArthurZ/whisper-small.en",
23
  "pad_token": null,
24
  "processor_class": "WhisperProcessor",
25
  "special_tokens_map_file": null,
 
19
  },
20
  "errors": "replace",
21
  "model_max_length": 1024,
22
+ "name_or_path": "whisper-any.en",
23
  "pad_token": null,
24
  "processor_class": "WhisperProcessor",
25
  "special_tokens_map_file": null,
vocab.json CHANGED
The diff for this file is too large to render. See raw diff