huseinzol05 commited on
Commit
81bca0f
1 Parent(s): 4997796

Upload processor

Browse files
preprocessor_config.json CHANGED
@@ -1,23 +1,14 @@
1
  {
2
- "do_normalize": true,
3
- "do_rescale": true,
4
- "do_resize": true,
5
- "image_mean": [
6
- 0.5,
7
- 0.5,
8
- 0.5
9
- ],
10
- "image_processor_type": "SiglipImageProcessor",
11
- "image_std": [
12
- 0.5,
13
- 0.5,
14
- 0.5
15
- ],
16
- "processor_class": "SiglipProcessor",
17
- "resample": 3,
18
- "rescale_factor": 0.00392156862745098,
19
- "size": {
20
- "height": 384,
21
- "width": 384
22
- }
23
  }
 
1
  {
2
+ "chunk_length": 30,
3
+ "feature_extractor_type": "WhisperFeatureExtractor",
4
+ "feature_size": 80,
5
+ "hop_length": 160,
6
+ "n_fft": 400,
7
+ "n_samples": 480000,
8
+ "nb_max_frames": 3000,
9
+ "padding_side": "right",
10
+ "padding_value": 0.0,
11
+ "processor_class": "WhisperProcessor",
12
+ "return_attention_mask": false,
13
+ "sampling_rate": 16000
 
 
 
 
 
 
 
 
 
14
  }
special_tokens_map.json CHANGED
@@ -1,23 +1,139 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "eos_token": {
3
- "content": "</s>",
4
- "lstrip": true,
5
  "normalized": false,
6
- "rstrip": true,
7
  "single_word": false
8
  },
9
  "pad_token": {
10
- "content": "</s>",
11
- "lstrip": true,
12
  "normalized": false,
13
- "rstrip": true,
14
  "single_word": false
15
  },
16
  "unk_token": {
17
- "content": "<unk>",
18
- "lstrip": true,
19
  "normalized": false,
20
- "rstrip": true,
21
  "single_word": false
22
  }
23
  }
 
1
  {
2
+ "additional_special_tokens": [
3
+ "<|endoftext|>",
4
+ "<|startoftranscript|>",
5
+ "<|en|>",
6
+ "<|zh|>",
7
+ "<|de|>",
8
+ "<|es|>",
9
+ "<|ru|>",
10
+ "<|ko|>",
11
+ "<|fr|>",
12
+ "<|ja|>",
13
+ "<|pt|>",
14
+ "<|tr|>",
15
+ "<|pl|>",
16
+ "<|ca|>",
17
+ "<|nl|>",
18
+ "<|ar|>",
19
+ "<|sv|>",
20
+ "<|it|>",
21
+ "<|id|>",
22
+ "<|hi|>",
23
+ "<|fi|>",
24
+ "<|vi|>",
25
+ "<|he|>",
26
+ "<|uk|>",
27
+ "<|el|>",
28
+ "<|ms|>",
29
+ "<|cs|>",
30
+ "<|ro|>",
31
+ "<|da|>",
32
+ "<|hu|>",
33
+ "<|ta|>",
34
+ "<|no|>",
35
+ "<|th|>",
36
+ "<|ur|>",
37
+ "<|hr|>",
38
+ "<|bg|>",
39
+ "<|lt|>",
40
+ "<|la|>",
41
+ "<|mi|>",
42
+ "<|ml|>",
43
+ "<|cy|>",
44
+ "<|sk|>",
45
+ "<|te|>",
46
+ "<|fa|>",
47
+ "<|lv|>",
48
+ "<|bn|>",
49
+ "<|sr|>",
50
+ "<|az|>",
51
+ "<|sl|>",
52
+ "<|kn|>",
53
+ "<|et|>",
54
+ "<|mk|>",
55
+ "<|br|>",
56
+ "<|eu|>",
57
+ "<|is|>",
58
+ "<|hy|>",
59
+ "<|ne|>",
60
+ "<|mn|>",
61
+ "<|bs|>",
62
+ "<|kk|>",
63
+ "<|sq|>",
64
+ "<|sw|>",
65
+ "<|gl|>",
66
+ "<|mr|>",
67
+ "<|pa|>",
68
+ "<|si|>",
69
+ "<|km|>",
70
+ "<|sn|>",
71
+ "<|yo|>",
72
+ "<|so|>",
73
+ "<|af|>",
74
+ "<|oc|>",
75
+ "<|ka|>",
76
+ "<|be|>",
77
+ "<|tg|>",
78
+ "<|sd|>",
79
+ "<|gu|>",
80
+ "<|am|>",
81
+ "<|yi|>",
82
+ "<|lo|>",
83
+ "<|uz|>",
84
+ "<|fo|>",
85
+ "<|ht|>",
86
+ "<|ps|>",
87
+ "<|tk|>",
88
+ "<|nn|>",
89
+ "<|mt|>",
90
+ "<|sa|>",
91
+ "<|lb|>",
92
+ "<|my|>",
93
+ "<|bo|>",
94
+ "<|tl|>",
95
+ "<|mg|>",
96
+ "<|as|>",
97
+ "<|tt|>",
98
+ "<|haw|>",
99
+ "<|ln|>",
100
+ "<|ha|>",
101
+ "<|ba|>",
102
+ "<|jw|>",
103
+ "<|su|>",
104
+ "<|translate|>",
105
+ "<|transcribe|>",
106
+ "<|startoflm|>",
107
+ "<|startofprev|>",
108
+ "<|nocaptions|>",
109
+ "<|notimestamps|>"
110
+ ],
111
+ "bos_token": {
112
+ "content": "<|endoftext|>",
113
+ "lstrip": false,
114
+ "normalized": false,
115
+ "rstrip": false,
116
+ "single_word": false
117
+ },
118
  "eos_token": {
119
+ "content": "<|endoftext|>",
120
+ "lstrip": false,
121
  "normalized": false,
122
+ "rstrip": false,
123
  "single_word": false
124
  },
125
  "pad_token": {
126
+ "content": "<|endoftext|>",
127
+ "lstrip": false,
128
  "normalized": false,
129
+ "rstrip": false,
130
  "single_word": false
131
  },
132
  "unk_token": {
133
+ "content": "<|endoftext|>",
134
+ "lstrip": false,
135
  "normalized": false,
136
+ "rstrip": false,
137
  "single_word": false
138
  }
139
  }
tokenizer_config.json CHANGED
The diff for this file is too large to render. See raw diff