Spaces:

waidhoferj
/

dance-classifier

Runtime error

App Files Files Community

dance-classifier / models /wav2vec2.py

waidhoferj

added hubert and updated preprocessing for wav2vec2

5da8010 over 2 years ago

raw

history blame contribute delete

2.91 kB

	import os
	from typing import Any
	import pytorch_lightning as pl
	from torch.utils.data import random_split
	from transformers import AutoFeatureExtractor
	from transformers import (
	AutoModelForAudioClassification,
	TrainingArguments,
	Trainer,
	AutoProcessor,
	)

	from preprocessing.dataset import (
	HuggingFaceDatasetWrapper,
	get_datasets,
	)
	from preprocessing.pipelines import WaveformTrainingPipeline

	from .utils import get_id_label_mapping, compute_hf_metrics

	MODEL_CHECKPOINT = "yuval6967/wav2vec2-base-finetuned-gtzan"
	PROCESSOR_CHECKPOINT = "facebook/wav2vec2-base"


	class Wav2VecFeatureExtractor:
	def __init__(self) -> None:
	self.waveform_pipeline = WaveformTrainingPipeline()
	self.feature_extractor = AutoProcessor.from_pretrained(PROCESSOR_CHECKPOINT)

	def __call__(self, waveform) -> Any:
	waveform = self.waveform_pipeline(waveform)
	return self.feature_extractor(waveform.squeeze(0), sampling_rate=16000)

	def __getattr__(self, attr):
	return getattr(self.feature_extractor, attr)


	def train_huggingface(config: dict):
	TARGET_CLASSES = config["dance_ids"]
	DEVICE = config["device"]
	SEED = config["seed"]
	OUTPUT_DIR = "models/weights/wav2vec2"
	batch_size = config["data_module"]["batch_size"]
	epochs = config["trainer"]["min_epochs"]
	test_proportion = config["data_module"].get("test_proportion", 0.2)
	pl.seed_everything(SEED, workers=True)
	feature_extractor = Wav2VecFeatureExtractor()
	dataset = get_datasets(config["datasets"], feature_extractor)
	dataset = HuggingFaceDatasetWrapper(dataset)
	id2label, label2id = get_id_label_mapping(TARGET_CLASSES)
	test_proportion = config["data_module"]["test_proportion"]
	train_proporition = 1 - test_proportion
	train_ds, test_ds = random_split(dataset, [train_proporition, test_proportion])

	model = AutoModelForAudioClassification.from_pretrained(
	MODEL_CHECKPOINT,
	num_labels=len(TARGET_CLASSES),
	label2id=label2id,
	id2label=id2label,
	ignore_mismatched_sizes=True,
	).to(DEVICE)
	training_args = TrainingArguments(
	output_dir=OUTPUT_DIR,
	evaluation_strategy="epoch",
	save_strategy="epoch",
	learning_rate=3e-5,
	per_device_train_batch_size=batch_size,
	gradient_accumulation_steps=5,
	gradient_checkpointing=True,
	per_device_eval_batch_size=batch_size,
	num_train_epochs=epochs,
	warmup_ratio=0.1,
	logging_steps=10,
	load_best_model_at_end=True,
	metric_for_best_model="accuracy",
	push_to_hub=False,
	use_mps_device=DEVICE == "mps",
	)
	trainer = Trainer(
	model=model,
	args=training_args,
	train_dataset=train_ds,
	eval_dataset=test_ds,
	compute_metrics=compute_hf_metrics,
	)
	trainer.train()
	return model