Spaces:

supun9
/

audio-sentiment-analysis

Configuration error

App Files Files Community

supun9 commited on Mar 22, 2023

Commit

b7f4dbe

•

1 Parent(s): c4e415d

Upload 5 files

Browse files

Files changed (5) hide show

README.md +1 -12
audio_train.py +236 -0
collator.py +38 -0
crema.py +73 -0
requirements.txt +101 -0

README.md CHANGED Viewed

@@ -1,12 +1 @@
----
-title: Audio Sentiment Analysis
-emoji: 🏃
-colorFrom: indigo
-colorTo: red
-sdk: gradio
-sdk_version: 3.23.0
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


1	+ Placeholder for zip data

audio_train.py ADDED Viewed

	@@ -0,0 +1,236 @@

+import os
+import logging
+import librosa
+import wandb
+import numpy as np
+from datasets import DatasetDict, load_dataset, load_metric
+from transformers import (
+    HubertForSequenceClassification,
+    PretrainedConfig,
+    Trainer,
+    TrainingArguments,
+    Wav2Vec2FeatureExtractor,
+)
+from utils import collator
+logging.basicConfig(
+    format="%(asctime)s | %(levelname)s: %(message)s", level=logging.INFO
+)
+PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))
+NUM_LABELS = 6
+USER = "XXXX" # TODO: replace with your username
+WANDB_PROJECT = "XXXXX" # TODO: replace with your project name
+wandb.init(entity=USER, project=WANDB_PROJECT)
+# PROCESS THE DATASET TO THE FORMAT EXPECTED BY THE MODEL FOR TRAINING
+PreTrainedFeatureExtractor = "SequenceFeatureExtractor"  # noqa: F821
+INPUT_FIELD = "input_values"
+LABEL_FIELD = "labels"
+def prepare_dataset(batch, feature_extractor: PreTrainedFeatureExtractor):
+    audio_arr = batch["array"]
+    input = feature_extractor(
+        audio_arr, sampling_rate=16000, padding=True, return_tensors="pt"
+    )
+    batch[INPUT_FIELD] = input.input_values[0]
+    batch[LABEL_FIELD] = batch[
+        "label"
+    ]  # colname MUST be labels as Trainer will look for it by default
+    return batch
+model_id = "facebook/hubert-base-ls960"
+MODELS_DIR = os.path.join(PROJECT_ROOT, "models")
+extractor_path = (
+    model_id
+    if len(os.listdir(MODELS_DIR)) == 0
+    else os.path.join(MODELS_DIR, "feature_extractor")
+)
+model_path = (
+    model_id
+    if len(os.listdir(MODELS_DIR)) == 0
+    else os.path.join(MODELS_DIR, "pretrained_model")
+)
+feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(extractor_path)
+config = PretrainedConfig.from_pretrained(model_path, num_labels=NUM_LABELS)
+hubert_model = HubertForSequenceClassification.from_pretrained(
+    model_path,
+    config=config,  # because we need to update num_labels as per our dataset
+    ignore_mismatched_sizes=True,  # to avoid classifier size mismatch from from_pretrained.
+)
+# FREEZE LAYERS
+# freeze all layers to begin with
+for param in hubert_model.parameters():
+    param.requires_grad = False
+layers_freeze_num = 2
+n_layers = (
+    4 + layers_freeze_num * 16
+)  # 4 refers to projector and classifier's weights and biases.
+for name, param in list(hubert_model.named_parameters())[-n_layers:]:
+    param.requires_grad = True
+# # freeze model weights for all layers except projector and classifier
+# for name, param in hubert_model.named_parameters():
+#     if any(ext in name for ext in ["projector", "classifier"]):
+#         param.requires_grad = True
+trainer_config = {
+    "OUTPUT_DIR": "results",
+    "TRAIN_EPOCHS": 5,
+    "TRAIN_BATCH_SIZE": 32,
+    "EVAL_BATCH_SIZE": 32,
+    "GRADIENT_ACCUMULATION_STEPS": 4,
+    "WARMUP_STEPS": 500,
+    "DECAY": 0.01,
+    "LOGGING_STEPS": 10,
+    "MODEL_DIR": "models/audio-model",
+    "LR": 1e-3,
+}
+dataset_config = {
+    "LOADING_SCRIPT_FILES": os.path.join(PROJECT_ROOT, "src/data/crema.py"),
+    "CONFIG_NAME": "clean",
+    "DATA_DIR": os.path.join(PROJECT_ROOT, "data/archive.zip"),
+    "CACHE_DIR": os.path.join(PROJECT_ROOT, "cache_crema"),
+}
+ds = load_dataset(
+    dataset_config["LOADING_SCRIPT_FILES"],
+    dataset_config["CONFIG_NAME"],
+    cache_dir=dataset_config["CACHE_DIR"],
+    data_dir=dataset_config["DATA_DIR"],
+)
+# CONVERING RAW AUDIO TO ARRAYS
+ds = ds.map(
+    lambda x: {"array": librosa.load(x["file"], sr=16000, mono=False)[0]},
+    num_proc=2,
+)
+# LABEL TO ID
+ds = ds.class_encode_column("label")
+# ds["train"] = ds["train"].select(range(2500))
+wandb.log({"dataset_size": len(ds["train"])})
+# APPLY THE DATA PREP USING FEATURE EXTRACTOR TO ALL EXAMPLES
+ds = ds.map(
+    prepare_dataset,
+    fn_kwargs={"feature_extractor": feature_extractor},
+    # num_proc=4,
+)
+logging.info("Finished extracting features from audio arrays.")
+# INTRODUCE TRAIN TEST VAL SPLITS
+# 90% train, 10% test + validation
+train_testvalid = ds["train"].train_test_split(shuffle=True, test_size=0.1)
+# Split the 10% test + valid in half test, half valid
+test_valid = train_testvalid["test"].train_test_split(test_size=0.5)
+# gather everyone if you want to have a single DatasetDict
+ds = DatasetDict(
+    {
+        "train": train_testvalid["train"],
+        "test": test_valid["test"],
+        "val": test_valid["train"],
+    }
+)
+# DEFINE DATA COLLATOR - TO PAD TRAINING BATCHES DYNAMICALLY
+data_collator = collator.DataCollatorCTCWithPadding(
+    processor=feature_extractor, padding=True
+)
+# Fine-Tuning with Trainer
+training_args = TrainingArguments(
+    output_dir=os.path.join(
+        PROJECT_ROOT, trainer_config["OUTPUT_DIR"]
+    ),  # output directory
+    gradient_accumulation_steps=trainer_config[
+        "GRADIENT_ACCUMULATION_STEPS"
+    ],  # accumulate the gradients before running optimization step
+    num_train_epochs=trainer_config["TRAIN_EPOCHS"],  # total number of training epochs
+    per_device_train_batch_size=trainer_config[
+        "TRAIN_BATCH_SIZE"
+    ],  # batch size per device during training
+    per_device_eval_batch_size=trainer_config[
+        "EVAL_BATCH_SIZE"
+    ],  # batch size for evaluation
+    warmup_steps=trainer_config[
+        "WARMUP_STEPS"
+    ],  # number of warmup steps for learning rate scheduler
+    weight_decay=trainer_config["DECAY"],  # strength of weight decay
+    logging_steps=trainer_config["LOGGING_STEPS"],
+    evaluation_strategy="epoch",  # report metric at end of each epoch
+    report_to="wandb",  # enable logging to W&B
+    learning_rate=trainer_config["LR"],  # default = 5e-5
+)
+def compute_metrics(eval_pred):
+    # DEFINE EVALUATION METRIC
+    compute_accuracy_metric = load_metric("accuracy")
+    logits, labels = eval_pred
+    predictions = np.argmax(logits, axis=-1)
+    return compute_accuracy_metric.compute(predictions=predictions, references=labels)
+# START TRAINING
+trainer = Trainer(
+    model=hubert_model,  # the instantiated 🤗 Transformers model to be trained
+    args=training_args,  # training arguments, defined above
+    data_collator=data_collator,
+    train_dataset=ds["train"],  # training dataset
+    eval_dataset=ds["val"],  # evaluation dataset
+    compute_metrics=compute_metrics,
+)
+trainer.train()
+# TO RESUME TRAINING FROM CHECKPOINT
+# trainer.train("results/checkpoint-2000")
+# VALIDATION SET RESULTS
+logging.info("Eval Set Result: {}".format(trainer.evaluate()))
+# TEST RESULTS
+test_results = trainer.predict(ds["test"])
+logging.info("Test Set Result: {}".format(test_results.metrics))
+wandb.log({"test_accuracy": test_results.metrics["test_accuracy"]})
+trainer.save_model(os.path.join(PROJECT_ROOT, trainer_config["MODEL_DIR"]))
+# logging trained models to wandb
+wandb.save(
+    os.path.join(PROJECT_ROOT, trainer_config["MODEL_DIR"], "*"),
+    base_path=os.path.dirname(trainer_config["MODEL_DIR"]),
+    policy="end",
+)

collator.py ADDED Viewed

	@@ -0,0 +1,38 @@

+from dataclasses import dataclass
+from typing import Dict, List, Optional, Union
+import torch
+from transformers import Wav2Vec2Processor
+INPUT_FIELD = "input_values"
+LABEL_FIELD = "labels"
+@dataclass
+class DataCollatorCTCWithPadding:
+    processor: Wav2Vec2Processor
+    padding: Union[bool, str] = True
+    max_length: Optional[int] = None
+    max_length_labels: Optional[int] = None
+    pad_to_multiple_of: Optional[int] = None
+    pad_to_multiple_of_labels: Optional[int] = None
+    def __call__(
+        self, examples: List[Dict[str, Union[List[int], torch.Tensor]]]
+    ) -> Dict[str, torch.Tensor]:
+        input_features = [
+            {INPUT_FIELD: example[INPUT_FIELD]} for example in examples
+        ]  # example is basically row0, row1, etc...
+        labels = [example[LABEL_FIELD] for example in examples]
+        batch = self.processor.pad(
+            input_features,
+            padding=self.padding,
+            max_length=self.max_length,
+            pad_to_multiple_of=self.pad_to_multiple_of,
+            return_tensors="pt",
+        )
+        batch[LABEL_FIELD] = torch.tensor(labels)
+        return batch

crema.py ADDED Viewed

	@@ -0,0 +1,73 @@

+# Lint as: python3
+"""CREMA-D dataset."""
+import os
+from typing import Union
+import datasets
+import pandas as pd
+_DESCRIPTION = """\
+    CREMA-D is a data set of 7,442 original clips from 91 actors.
+    These clips were from 48 male and 43 female actors between the ages of 20 and 74
+    coming from a variety of races and ethnicities (African America, Asian,
+    Caucasian, Hispanic, and Unspecified). Actors spoke from a selection of 12
+    sentences. The sentences were presented using one of six different emotions
+    (Anger, Disgust, Fear, Happy, Neutral, and Sad) and four different emotion
+    levels (Low, Medium, High, and Unspecified).
+"""
+_HOMEPAGE = "https://github.com/CheyneyComputerScience/CREMA-D"
+DATA_DIR = {"train": "AudioWAV"}
+class Crema(datasets.GeneratorBasedBuilder):
+    """Crema-D dataset."""
+    DEFAULT_WRITER_BATCH_SIZE = 256
+    BUILDER_CONFIGS = [datasets.BuilderConfig(name="clean", description="Train Set.")]
+    def _info(self):
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=datasets.Features(
+                {"file": datasets.Value("string"), "label": datasets.Value("string")}
+            ),
+            supervised_keys=("file", "label"),
+            homepage=_HOMEPAGE,
+        )
+    def _split_generators(
+        self, dl_manager: datasets.utils.download_manager.DownloadManager
+    ):
+        data_dir = dl_manager.extract(self.config.data_dir)
+        if self.config.name == "clean":
+            train_splits = [
+                datasets.SplitGenerator(
+                    name="train", gen_kwargs={"files": data_dir, "name": "train"}
+                )
+            ]
+        return train_splits
+    def _generate_examples(self, files: Union[str, os.PathLike], name: str):
+        """Generate examples from a Crema unzipped directory."""
+        key = 0
+        examples = list()
+        audio_dir = os.path.join(files, DATA_DIR[name])
+        if not os.path.exists(audio_dir):
+            raise FileNotFoundError
+        else:
+            for file in os.listdir(audio_dir):
+                res = dict()
+                res["file"] = "{}".format(os.path.join(audio_dir, file))
+                res["label"] = file.split("_")[-2]
+                examples.append(res)
+        for example in examples:
+            yield key, {**example}
+            key += 1
+        examples = []

requirements.txt ADDED Viewed

	@@ -0,0 +1,101 @@

+aiohttp==3.8.1
+aiosignal==1.2.0
+appdirs==1.4.4
+appnope==0.1.3
+asttokens==2.0.8
+async-timeout==4.0.2
+attrs==22.1.0
+audioread==3.0.0
+backcall==0.2.0
+black==22.6.0
+certifi==2022.6.15
+cffi==1.15.1
+charset-normalizer==2.1.1
+click==8.1.3
+datasets==2.4.0
+debugpy==1.6.3
+decorator==5.1.1
+dill==0.3.5.1
+docker-pycreds==0.4.0
+entrypoints==0.4
+executing==0.10.0
+filelock==3.8.0
+flake8==5.0.4
+frozenlist==1.3.1
+fsspec==2022.7.1
+gitdb==4.0.9
+GitPython==3.1.27
+huggingface-hub==0.8.1
+idna==3.3
+ipykernel==6.15.1
+ipython==8.4.0
+ipywidgets==8.0.1
+jedi==0.18.1
+joblib==1.1.0
+jupyter-client==7.3.4
+jupyter-core==4.11.1
+jupyterlab-widgets==3.0.2
+librosa==0.8.1
+llvmlite==0.39.0
+matplotlib-inline==0.1.6
+mccabe==0.7.0
+multidict==6.0.2
+multiprocess==0.70.13
+mypy-extensions==0.4.3
+nest-asyncio==1.5.5
+numba==0.56.0
+numpy==1.22.0
+packaging==21.3
+pandas==1.4.3
+parso==0.8.3
+pathspec==0.9.0
+pathtools==0.1.2
+pexpect==4.8.0
+pickleshare==0.7.5
+platformdirs==2.5.2
+pooch==1.6.0
+promise==2.3
+prompt-toolkit==3.0.30
+protobuf==3.20.1
+psutil==5.9.1
+ptyprocess==0.7.0
+pure-eval==0.2.2
+pyarrow==9.0.0
+pycodestyle==2.9.1
+pycparser==2.21
+pyflakes==2.5.0
+Pygments==2.13.0
+pyparsing==3.0.9
+python-dateutil==2.8.2
+pytz==2022.2.1
+PyYAML==6.0
+pyzmq==23.2.1
+regex==2022.8.17
+requests==2.28.1
+resampy==0.4.0
+responses==0.18.0
+scikit-learn==1.1.2
+scipy==1.9.0
+sentry-sdk==1.9.5
+setproctitle==1.3.2
+shortuuid==1.0.9
+six==1.16.0
+sklearn==0.0
+smmap==5.0.0
+SoundFile==0.10.3.post1
+stack-data==0.4.0
+threadpoolctl==3.1.0
+tokenizers==0.12.1
+tomli==2.0.1
+torch==1.12.1
+tornado==6.2
+tqdm==4.64.0
+traitlets==5.3.0
+transformers==4.21.2
+typing_extensions==4.3.0
+urllib3==1.26.12
+wandb==0.13.2
+wcwidth==0.2.5
+widgetsnbextension==4.0.2
+xxhash==3.0.0
+yarl==1.8.1