| """ | |
| The setting of Superb ER | |
| Authors | |
| * Tzu-Hsien Huang 2021 | |
| * Leo 2021 | |
| * Leo 2022 | |
| """ | |
| import json | |
| import logging | |
| from pathlib import Path | |
| from typing import List | |
| import pandas as pd | |
| import torch | |
| from omegaconf import MISSING | |
| from torch.utils.data import random_split | |
| from s3prl.dataio.corpus.iemocap import IEMOCAP | |
| from s3prl.util.download import download | |
| from .superb_sid import SuperbSID | |
| logger = logging.getLogger(__name__) | |
| __all__ = [ | |
| "iemocap_for_superb", | |
| "SuperbER", | |
| ] | |
| def iemocap_for_superb( | |
| target_dir: str, | |
| cache_dir: str, | |
| iemocap: str, | |
| test_fold: int, | |
| valid_ratio: float = 0.2, | |
| get_path_only: bool = False, | |
| ): | |
| """ | |
| Prepare IEMOCAP for emotion classfication with SUPERB protocol, | |
| following :obj:`SuperbER.prepare_data` format. | |
| .. note:: | |
| In SUPERB protocol, you need to do 5-fold cross validation. | |
| Also, only use 4 emotion classes: :code:`happy`, :code:`angry`, | |
| :code:`neutral`, and :code:`sad` with balanced data points and | |
| the :code:`excited` class is merged into :code:`happy` class. | |
| Args: | |
| iemocap (str): The root path of the IEMOCAP | |
| test_fold (int): Which fold to use as the test fold, select from 0 to 4 | |
| valid_ratio (float): given the remaining 4 folds, how many data to use as the validation set | |
| **others: refer to :obj:`SuperbER.prepare_data` | |
| """ | |
| target_dir = Path(target_dir) | |
| train_path = target_dir / f"train.csv" | |
| valid_path = target_dir / f"valid.csv" | |
| test_paths = [target_dir / f"test.csv"] | |
| if get_path_only: | |
| return train_path, valid_path, test_paths | |
| corpus = IEMOCAP(iemocap) | |
| all_datapoints = corpus.all_data | |
| def format_fields(data: dict): | |
| result = dict() | |
| for data_id in data.keys(): | |
| datapoint = data[data_id] | |
| result[data_id] = dict( | |
| wav_path=datapoint["wav_path"], | |
| label=datapoint["emotion"], | |
| ) | |
| return result | |
| def filter_data(data_ids: List[str]): | |
| result = dict() | |
| for data_id in data_ids: | |
| data_point = all_datapoints[data_id] | |
| if data_point["emotion"] in ["neu", "hap", "ang", "sad", "exc"]: | |
| if data_point["emotion"] == "exc": | |
| data_point["emotion"] = "hap" | |
| result[data_id] = data_point | |
| return result | |
| test_session_id = test_fold + 1 | |
| train_meta_data_json = ( | |
| Path(cache_dir) / f"test_session{test_session_id}_train_metadata.json" | |
| ) | |
| test_meta_data_json = ( | |
| Path(cache_dir) / f"test_session{test_session_id}_test_metadata.json" | |
| ) | |
| download( | |
| train_meta_data_json, | |
| f"https://huggingface.co/datasets/s3prl/iemocap_split/raw/4097f2b496c41eed016d4e5eb0ada4cccd46d1f3/Session{test_session_id}/train_meta_data.json", | |
| refresh=False, | |
| ) | |
| download( | |
| test_meta_data_json, | |
| f"https://huggingface.co/datasets/s3prl/iemocap_split/raw/4097f2b496c41eed016d4e5eb0ada4cccd46d1f3/Session{test_session_id}/test_meta_data.json", | |
| refresh=False, | |
| ) | |
| with open(train_meta_data_json) as f: | |
| metadata = json.load(f)["meta_data"] | |
| dev_ids = [Path(item["path"]).stem for item in metadata] | |
| with open(test_meta_data_json) as f: | |
| metadata = json.load(f)["meta_data"] | |
| test_ids = [Path(item["path"]).stem for item in metadata] | |
| train_len = int((1 - valid_ratio) * len(dev_ids)) | |
| train_valid_lens = [train_len, len(dev_ids) - train_len] | |
| torch.manual_seed(0) | |
| train_ids, valid_ids = random_split(dev_ids, train_valid_lens) | |
| train_data = format_fields(filter_data(train_ids)) | |
| valid_data = format_fields(filter_data(valid_ids)) | |
| test_data = format_fields(filter_data(test_ids)) | |
| def dict_to_csv(data_dict, csv_path): | |
| keys = sorted(list(data_dict.keys())) | |
| fields = sorted(data_dict[keys[0]].keys()) | |
| data = dict() | |
| for field in fields: | |
| data[field] = [] | |
| for key in keys: | |
| data[field].append(data_dict[key][field]) | |
| data["id"] = keys | |
| df = pd.DataFrame(data) | |
| df.to_csv(csv_path, index=False) | |
| dict_to_csv(train_data, train_path) | |
| dict_to_csv(valid_data, valid_path) | |
| dict_to_csv(test_data, test_paths[0]) | |
| return train_path, valid_path, test_paths | |
| class SuperbER(SuperbSID): | |
| def default_config(self) -> dict: | |
| return dict( | |
| start=0, | |
| stop=None, | |
| target_dir=MISSING, | |
| cache_dir=None, | |
| remove_all_cache=False, | |
| prepare_data=dict( | |
| iemocap=MISSING, | |
| test_fold=MISSING, | |
| ), | |
| build_encoder=dict(), | |
| build_dataset=dict(), | |
| build_batch_sampler=dict( | |
| train=dict( | |
| batch_size=4, | |
| shuffle=True, | |
| ), | |
| valid=dict( | |
| batch_size=4, | |
| ), | |
| test=dict( | |
| batch_size=4, | |
| ), | |
| ), | |
| build_upstream=dict( | |
| name=MISSING, | |
| ), | |
| build_featurizer=dict( | |
| layer_selections=None, | |
| normalize=False, | |
| ), | |
| build_downstream=dict( | |
| hidden_size=256, | |
| ), | |
| build_model=dict( | |
| upstream_trainable=False, | |
| ), | |
| build_task=dict(), | |
| build_optimizer=dict( | |
| name="Adam", | |
| conf=dict( | |
| lr=1.0e-4, | |
| ), | |
| ), | |
| build_scheduler=dict( | |
| name="ExponentialLR", | |
| gamma=0.9, | |
| ), | |
| save_model=dict(), | |
| save_task=dict(), | |
| train=dict( | |
| total_steps=30000, | |
| log_step=500, | |
| eval_step=1000, | |
| save_step=1000, | |
| gradient_clipping=1.0, | |
| gradient_accumulate=8, | |
| valid_metric="accuracy", | |
| valid_higher_better=True, | |
| auto_resume=True, | |
| resume_ckpt_dir=None, | |
| ), | |
| evaluate=dict(), | |
| ) | |
| def prepare_data( | |
| self, | |
| prepare_data: dict, | |
| target_dir: str, | |
| cache_dir: str, | |
| get_path_only: bool = False, | |
| ): | |
| """ | |
| Prepare the task-specific data metadata (path, labels...). | |
| By default call :obj:`iemocap_for_superb` with :code:`**prepare_data` | |
| Args: | |
| prepare_data (dict): same in :obj:`default_config`, | |
| support arguments in :obj:`iemocap_for_superb` | |
| target_dir (str): Parse your corpus and save the csv file into this directory | |
| cache_dir (str): If the parsing or preprocessing takes too long time, you can save | |
| the temporary files into this directory. This directory is expected to be shared | |
| across different training sessions (different hypers and :code:`target_dir`) | |
| get_path_only (str): Directly return the filepaths no matter they exist or not. | |
| Returns: | |
| tuple | |
| 1. train_path (str) | |
| 2. valid_path (str) | |
| 3. test_paths (List[str]) | |
| Each path (str) should be a csv file containing the following columns: | |
| ==================== ==================== | |
| column description | |
| ==================== ==================== | |
| id (str) - the unique id for this data point | |
| wav_path (str) - the absolute path of the waveform file | |
| label (str) - a string label of the waveform | |
| start_sec (float) - optional, load the waveform from :code:`start_sec` seconds. If not presented or is :code:`math.nan`, load from the beginning. | |
| end_sec (float) - optional, load the waveform from :code:`end_sec` seconds. If not presented or is :code:`math.nan`, load to the end. | |
| ==================== ==================== | |
| """ | |
| return iemocap_for_superb( | |
| **self._get_current_arguments(flatten_dict="prepare_data") | |
| ) | |