chipewyan
file path change
92cfe12
from transformers import Wav2Vec2CTCTokenizer, Wav2Vec2FeatureExtractor, Wav2Vec2Processor, Wav2Vec2ForCTC, TrainingArguments, Trainer
import argparse
def upload(model_path, vocab_file, directory):
"""
Running this function will create additional necessary files such as `special_tokens_map.json`
and make it ready for the model to be uploaded on the huggingface repository.
"""
print("Reading the model...")
model = Wav2Vec2ForCTC.from_pretrained(model_path)
print("Model read")
print("Reading the tokenizer...")
tokenizer_sentence = Wav2Vec2CTCTokenizer("./{}".format(vocab_file), unk_token="[UNK]",
pad_token="[PAD]", word_delimiter_token="|")
print("Tokenizer read")
print("Saving the model...")
model.save_pretrained(directory)
print("Model saved")
print("Saving the tokenizer...")
tokenizer_sentence.save_pretrained(directory)
print("Tokenizer saved")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-m", "--model_path", default="./", type=str,
help="Model (checkpoint) directory path")
parser.add_argument("-v", "--vocab_file", required=True, type=str,
help="Vocab file path")
parser.add_argument("-d", "--directory", default="./", type=str,
help="Destination of the saved model")
args = parser.parse_args()
model_path = args.model_path
vocab_file = args.vocab_file
directory = args.directory
upload(model_path, vocab_file, directory)