ydshieh
/

test-dynamic-processor

Model card Files Files and versions Community

ydshieh HF staff commited on Jan 10

Commit

990cd1d

•

1 Parent(s): 02052e9

commit files to HF hub

Files changed (8) hide show

custom_feature_extraction.py +5 -0
custom_processing.py +6 -0
custom_tokenization.py +5 -0
preprocessor_config.json +14 -0
processor_config.json +6 -0
special_tokens_map.json +7 -0
tokenizer_config.json +65 -0
vocab.txt +7 -0

custom_feature_extraction.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from transformers import Wav2Vec2FeatureExtractor
+class CustomFeatureExtractor(Wav2Vec2FeatureExtractor):
+    pass

custom_processing.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from transformers import ProcessorMixin
+class CustomProcessor(ProcessorMixin):
+    feature_extractor_class = "AutoFeatureExtractor"
+    tokenizer_class = "AutoTokenizer"

custom_tokenization.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from transformers import BertTokenizer
+class CustomTokenizer(BertTokenizer):
+    pass

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "auto_map": {
+    "AutoFeatureExtractor": "custom_feature_extraction.CustomFeatureExtractor",
+    "AutoProcessor": "custom_processing.CustomProcessor"
+  },
+  "do_normalize": true,
+  "feature_extractor_type": "CustomFeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "processor_class": "CustomProcessor",
+  "return_attention_mask": false,
+  "sampling_rate": 16000
+}

processor_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "auto_map": {
+    "AutoProcessor": "custom_processing.CustomProcessor"
+  },
+  "processor_class": "CustomProcessor"
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,65 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "auto_map": {
+    "AutoProcessor": "custom_processing.CustomProcessor",
+    "AutoTokenizer": [
+      "custom_tokenization.CustomTokenizer",
+      null
+    ]
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "processor_class": "CustomProcessor",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "CustomTokenizer",
+  "unk_token": "[UNK]"
+}

vocab.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+[UNK]
+[CLS]
+[SEP]
+[PAD]
+[MASK]
+bla
+blou