Spaces:

supun9
/

audio-sentiment-analysis

Configuration error

File size: 6,861 Bytes

b7f4dbe

import os
import logging
import librosa

import wandb
import numpy as np

from datasets import DatasetDict, load_dataset, load_metric
from transformers import (
    HubertForSequenceClassification,
    PretrainedConfig,
    Trainer,
    TrainingArguments,
    Wav2Vec2FeatureExtractor,
)
from utils import collator

logging.basicConfig(
    format="%(asctime)s | %(levelname)s: %(message)s", level=logging.INFO
)

PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))
NUM_LABELS = 6


USER = "XXXX" # TODO: replace with your username
WANDB_PROJECT = "XXXXX" # TODO: replace with your project name
wandb.init(entity=USER, project=WANDB_PROJECT)


# PROCESS THE DATASET TO THE FORMAT EXPECTED BY THE MODEL FOR TRAINING
PreTrainedFeatureExtractor = "SequenceFeatureExtractor"  # noqa: F821

INPUT_FIELD = "input_values"
LABEL_FIELD = "labels"


def prepare_dataset(batch, feature_extractor: PreTrainedFeatureExtractor):
    audio_arr = batch["array"]
    input = feature_extractor(
        audio_arr, sampling_rate=16000, padding=True, return_tensors="pt"
    )

    batch[INPUT_FIELD] = input.input_values[0]
    batch[LABEL_FIELD] = batch[
        "label"
    ]  # colname MUST be labels as Trainer will look for it by default

    return batch


model_id = "facebook/hubert-base-ls960"
MODELS_DIR = os.path.join(PROJECT_ROOT, "models")

extractor_path = (
    model_id
    if len(os.listdir(MODELS_DIR)) == 0
    else os.path.join(MODELS_DIR, "feature_extractor")
)
model_path = (
    model_id
    if len(os.listdir(MODELS_DIR)) == 0
    else os.path.join(MODELS_DIR, "pretrained_model")
)

feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(extractor_path)

config = PretrainedConfig.from_pretrained(model_path, num_labels=NUM_LABELS)
hubert_model = HubertForSequenceClassification.from_pretrained(
    model_path,
    config=config,  # because we need to update num_labels as per our dataset
    ignore_mismatched_sizes=True,  # to avoid classifier size mismatch from from_pretrained.
)


# FREEZE LAYERS

# freeze all layers to begin with
for param in hubert_model.parameters():
    param.requires_grad = False

layers_freeze_num = 2
n_layers = (
    4 + layers_freeze_num * 16
)  # 4 refers to projector and classifier's weights and biases.
for name, param in list(hubert_model.named_parameters())[-n_layers:]:
    param.requires_grad = True

# # freeze model weights for all layers except projector and classifier
# for name, param in hubert_model.named_parameters():
#     if any(ext in name for ext in ["projector", "classifier"]):
#         param.requires_grad = True


trainer_config = {
    "OUTPUT_DIR": "results",
    "TRAIN_EPOCHS": 5,
    "TRAIN_BATCH_SIZE": 32,
    "EVAL_BATCH_SIZE": 32,
    "GRADIENT_ACCUMULATION_STEPS": 4,
    "WARMUP_STEPS": 500,
    "DECAY": 0.01,
    "LOGGING_STEPS": 10,
    "MODEL_DIR": "models/audio-model",
    "LR": 1e-3,
}


dataset_config = {
    "LOADING_SCRIPT_FILES": os.path.join(PROJECT_ROOT, "src/data/crema.py"),
    "CONFIG_NAME": "clean",
    "DATA_DIR": os.path.join(PROJECT_ROOT, "data/archive.zip"),
    "CACHE_DIR": os.path.join(PROJECT_ROOT, "cache_crema"),
}


ds = load_dataset(
    dataset_config["LOADING_SCRIPT_FILES"],
    dataset_config["CONFIG_NAME"],
    cache_dir=dataset_config["CACHE_DIR"],
    data_dir=dataset_config["DATA_DIR"],
)


# CONVERING RAW AUDIO TO ARRAYS
ds = ds.map(
    lambda x: {"array": librosa.load(x["file"], sr=16000, mono=False)[0]},
    num_proc=2,
)


# LABEL TO ID
ds = ds.class_encode_column("label")


# ds["train"] = ds["train"].select(range(2500))
wandb.log({"dataset_size": len(ds["train"])})


# APPLY THE DATA PREP USING FEATURE EXTRACTOR TO ALL EXAMPLES
ds = ds.map(
    prepare_dataset,
    fn_kwargs={"feature_extractor": feature_extractor},
    # num_proc=4,
)
logging.info("Finished extracting features from audio arrays.")


# INTRODUCE TRAIN TEST VAL SPLITS

# 90% train, 10% test + validation
train_testvalid = ds["train"].train_test_split(shuffle=True, test_size=0.1)
# Split the 10% test + valid in half test, half valid
test_valid = train_testvalid["test"].train_test_split(test_size=0.5)
# gather everyone if you want to have a single DatasetDict
ds = DatasetDict(
    {
        "train": train_testvalid["train"],
        "test": test_valid["test"],
        "val": test_valid["train"],
    }
)


# DEFINE DATA COLLATOR - TO PAD TRAINING BATCHES DYNAMICALLY
data_collator = collator.DataCollatorCTCWithPadding(
    processor=feature_extractor, padding=True
)


# Fine-Tuning with Trainer
training_args = TrainingArguments(
    output_dir=os.path.join(
        PROJECT_ROOT, trainer_config["OUTPUT_DIR"]
    ),  # output directory
    gradient_accumulation_steps=trainer_config[
        "GRADIENT_ACCUMULATION_STEPS"
    ],  # accumulate the gradients before running optimization step
    num_train_epochs=trainer_config["TRAIN_EPOCHS"],  # total number of training epochs
    per_device_train_batch_size=trainer_config[
        "TRAIN_BATCH_SIZE"
    ],  # batch size per device during training
    per_device_eval_batch_size=trainer_config[
        "EVAL_BATCH_SIZE"
    ],  # batch size for evaluation
    warmup_steps=trainer_config[
        "WARMUP_STEPS"
    ],  # number of warmup steps for learning rate scheduler
    weight_decay=trainer_config["DECAY"],  # strength of weight decay
    logging_steps=trainer_config["LOGGING_STEPS"],
    evaluation_strategy="epoch",  # report metric at end of each epoch
    report_to="wandb",  # enable logging to W&B
    learning_rate=trainer_config["LR"],  # default = 5e-5
)


def compute_metrics(eval_pred):
    # DEFINE EVALUATION METRIC
    compute_accuracy_metric = load_metric("accuracy")
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return compute_accuracy_metric.compute(predictions=predictions, references=labels)


# START TRAINING
trainer = Trainer(
    model=hubert_model,  # the instantiated 🤗 Transformers model to be trained
    args=training_args,  # training arguments, defined above
    data_collator=data_collator,
    train_dataset=ds["train"],  # training dataset
    eval_dataset=ds["val"],  # evaluation dataset
    compute_metrics=compute_metrics,
)


trainer.train()

# TO RESUME TRAINING FROM CHECKPOINT
# trainer.train("results/checkpoint-2000")

# VALIDATION SET RESULTS
logging.info("Eval Set Result: {}".format(trainer.evaluate()))

# TEST RESULTS
test_results = trainer.predict(ds["test"])
logging.info("Test Set Result: {}".format(test_results.metrics))
wandb.log({"test_accuracy": test_results.metrics["test_accuracy"]})

trainer.save_model(os.path.join(PROJECT_ROOT, trainer_config["MODEL_DIR"]))

# logging trained models to wandb
wandb.save(
    os.path.join(PROJECT_ROOT, trainer_config["MODEL_DIR"], "*"),
    base_path=os.path.dirname(trainer_config["MODEL_DIR"]),
    policy="end",
)