aoxo commited on
Commit
1a7c2c6
1 Parent(s): ae42d95

Create push_to_hub.py

Browse files
Files changed (1) hide show
  1. push_to_hub.py +46 -0
push_to_hub.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2CTCTokenizer, Wav2Vec2FeatureExtractor, Wav2Vec2Processor
2
+ import json
3
+
4
+ # Path to your local model directory and vocab file
5
+ local_model_path = './wav2vec2-base-mal' # Directory with model checkpoints
6
+ vocab_path = './vocab.json' # Path to your vocab.json file
7
+
8
+ # Hugging Face model ID (replace with your username)
9
+ model_id = "aoxo/wav2vec2-base-mal"
10
+
11
+ # Load vocab
12
+ with open(vocab_path, 'r') as f:
13
+ vocab_dict = json.load(f)
14
+
15
+ # Create custom tokenizer
16
+ tokenizer = Wav2Vec2CTCTokenizer(
17
+ vocab_path,
18
+ unk_token="[UNK]",
19
+ pad_token="[PAD]",
20
+ word_delimiter_token="|"
21
+ )
22
+
23
+ # Create feature extractor
24
+ feature_extractor = Wav2Vec2FeatureExtractor(
25
+ feature_size=1,
26
+ sampling_rate=16000,
27
+ padding_value=0.0,
28
+ do_normalize=True,
29
+ return_attention_mask=False
30
+ )
31
+
32
+ # Create processor
33
+ processor = Wav2Vec2Processor(
34
+ feature_extractor=feature_extractor,
35
+ tokenizer=tokenizer
36
+ )
37
+
38
+ # Load the model from the checkpoint directory
39
+ model = Wav2Vec2ForCTC.from_pretrained(local_model_path)
40
+
41
+ # Push to Hugging Face Hub
42
+ model.push_to_hub(model_id)
43
+ processor.push_to_hub(model_id)
44
+ tokenizer.push_to_hub(model_id)
45
+
46
+ print(f"Model, processor, and tokenizer successfully pushed to {model_id}")