aoxo
/

wav2vec2-large-mal

Automatic Speech Recognition

Inference Endpoints

Model card Files Files and versions Community

wav2vec2-large-mal / push_to_hub.py

aoxo's picture

Upload folder using huggingface_hub

d6525d6 verified 19 days ago

history blame contribute delete

1.26 kB

	from transformers import Wav2Vec2ForCTC, Wav2Vec2CTCTokenizer, Wav2Vec2FeatureExtractor, Wav2Vec2Processor
	import json

	# Path to your local model directory and vocab file
	local_model_path = './wav2vec2-large-mal' # Directory with model checkpoints
	vocab_path = './vocab.json' # Path to your vocab.json file

	# Hugging Face model ID (replace with your username)
	model_id = "aoxo/wav2vec2-large-mal"

	# Load vocab
	with open(vocab_path, 'r') as f:
	vocab_dict = json.load(f)

	# Create custom tokenizer
	tokenizer = Wav2Vec2CTCTokenizer(
	vocab_path,
	unk_token="[UNK]",
	pad_token="[PAD]",
	word_delimiter_token="\|"
	)

	# Create feature extractor
	feature_extractor = Wav2Vec2FeatureExtractor(
	feature_size=1,
	sampling_rate=16000,
	padding_value=0.0,
	do_normalize=True,
	return_attention_mask=False
	)

	# Create processor
	processor = Wav2Vec2Processor(
	feature_extractor=feature_extractor,
	tokenizer=tokenizer
	)

	# Load the model from the checkpoint directory
	model = Wav2Vec2ForCTC.from_pretrained(local_model_path)

	# Push to Hugging Face Hub
	model.push_to_hub(model_id)
	processor.push_to_hub(model_id)
	tokenizer.push_to_hub(model_id)

	print(f"Model, processor, and tokenizer successfully pushed to {model_id}")