Spaces:
Sleeping
Sleeping
File size: 8,999 Bytes
9f41dd5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 |
"""Module for training a custom classifier.
Can be used to train a custom classifier with new training data.
"""
import argparse
import os
import numpy as np
import audio
import config as cfg
import model
import utils
def _loadTrainingData(cache_mode="none", cache_file=""):
"""Loads the data for training.
Reads all subdirectories of "config.TRAIN_DATA_PATH" and uses their names as new labels.
These directories should contain all the training data for each label.
If a cache file is provided, the training data is loaded from there.
Args:
cache_mode: Cache mode. Can be 'none', 'load' or 'save'. Defaults to 'none'.
cache_file: Path to cache file.
Returns:
A tuple of (x_train, y_train, labels).
"""
# Load from cache
if cache_mode == "load":
if os.path.isfile(cache_file):
print(f"\t...loading from cache: {cache_file}", flush=True)
x_train, y_train, labels = utils.loadFromCache(cache_file)
return x_train, y_train, labels
else:
print(f"\t...cache file not found: {cache_file}", flush=True)
# Get list of subfolders as labels
labels = list(sorted(utils.list_subdirectories(cfg.TRAIN_DATA_PATH)))
# Get valid labels
valid_labels = [l for l in labels if not l.lower() in cfg.NON_EVENT_CLASSES]
# Load training data
x_train = []
y_train = []
for label in labels:
# Current label
print(f"\t- {label}", flush=True)
# Get label vector
label_vector = np.zeros((len(valid_labels),), dtype="float32")
if not label.lower() in cfg.NON_EVENT_CLASSES and not label.startswith("-"):
label_vector[valid_labels.index(label)] = 1
# Get list of files
# Filter files that start with '.' because macOS seems to them for temp files.
files = filter(
os.path.isfile,
(
os.path.join(cfg.TRAIN_DATA_PATH, label, f)
for f in sorted(os.listdir(os.path.join(cfg.TRAIN_DATA_PATH, label)))
if not f.startswith(".") and f.rsplit(".", 1)[-1].lower() in cfg.ALLOWED_FILETYPES
),
)
# Load files
for f in files:
# Load audio
sig, rate = audio.openAudioFile(f, duration=cfg.SIG_LENGTH if cfg.SAMPLE_CROP_MODE == "first" else None)
# Crop training samples
if cfg.SAMPLE_CROP_MODE == "center":
sig_splits = [audio.cropCenter(sig, rate, cfg.SIG_LENGTH)]
elif cfg.SAMPLE_CROP_MODE == "first":
sig_splits = [audio.splitSignal(sig, rate, cfg.SIG_LENGTH, cfg.SIG_OVERLAP, cfg.SIG_MINLEN)[0]]
else:
sig_splits = audio.splitSignal(sig, rate, cfg.SIG_LENGTH, cfg.SIG_OVERLAP, cfg.SIG_MINLEN)
# Get feature embeddings
for sig in sig_splits:
embeddings = model.embeddings([sig])[0]
# Add to training data
x_train.append(embeddings)
y_train.append(label_vector)
# Convert to numpy arrays
x_train = np.array(x_train, dtype="float32")
y_train = np.array(y_train, dtype="float32")
# Remove non-event classes from labels
labels = [l for l in labels if not l.lower() in cfg.NON_EVENT_CLASSES]
# Save to cache?
if cache_mode == "save":
print(f"\t...saving training data to cache: {cache_file}", flush=True)
try:
utils.saveToCache(cache_file, x_train, y_train, labels)
except Exception as e:
print(f"\t...error saving cache: {e}", flush=True)
return x_train, y_train, labels
def trainModel(on_epoch_end=None):
"""Trains a custom classifier.
Args:
on_epoch_end: A callback function that takes two arguments `epoch`, `logs`.
Returns:
A keras `History` object, whose `history` property contains all the metrics.
"""
# Load training data
print("Loading training data...", flush=True)
x_train, y_train, labels = _loadTrainingData(cfg.TRAIN_CACHE_MODE, cfg.TRAIN_CACHE_FILE)
print(f"...Done. Loaded {x_train.shape[0]} training samples and {y_train.shape[1]} labels.", flush=True)
# Build model
print("Building model...", flush=True)
classifier = model.buildLinearClassifier(y_train.shape[1], x_train.shape[1], cfg.TRAIN_HIDDEN_UNITS, cfg.TRAIN_DROPOUT)
print("...Done.", flush=True)
# Train model
print("Training model...", flush=True)
classifier, history = model.trainLinearClassifier(
classifier,
x_train,
y_train,
epochs=cfg.TRAIN_EPOCHS,
batch_size=cfg.TRAIN_BATCH_SIZE,
learning_rate=cfg.TRAIN_LEARNING_RATE,
val_split=cfg.TRAIN_VAL_SPLIT,
upsampling_ratio=cfg.UPSAMPLING_RATIO,
upsampling_mode=cfg.UPSAMPLING_MODE,
train_with_mixup=cfg.TRAIN_WITH_MIXUP,
train_with_label_smoothing=cfg.TRAIN_WITH_LABEL_SMOOTHING,
on_epoch_end=on_epoch_end,
)
# Best validation AUPRC (at minimum validation loss)
best_val_auprc = history.history["val_AUPRC"][np.argmin(history.history["val_loss"])]
if cfg.TRAINED_MODEL_OUTPUT_FORMAT == "both":
model.save_raven_model(classifier, cfg.CUSTOM_CLASSIFIER, labels)
model.saveLinearClassifier(classifier, cfg.CUSTOM_CLASSIFIER, labels)
elif cfg.TRAINED_MODEL_OUTPUT_FORMAT == "tflite":
model.saveLinearClassifier(classifier, cfg.CUSTOM_CLASSIFIER, labels)
elif cfg.TRAINED_MODEL_OUTPUT_FORMAT == "raven":
model.save_raven_model(classifier, cfg.CUSTOM_CLASSIFIER, labels)
else:
raise ValueError(f"Unknown model output format: {cfg.TRAINED_MODEL_OUTPUT_FORMAT}")
print(f"...Done. Best AUPRC: {best_val_auprc}", flush=True)
return history
if __name__ == "__main__":
# Parse arguments
parser = argparse.ArgumentParser(description="Train a custom classifier with BirdNET")
parser.add_argument("--i", default="train_data/", help="Path to training data folder. Subfolder names are used as labels.")
parser.add_argument("--crop_mode", default="center", help="Crop mode for training data. Can be 'center', 'first' or 'segments'. Defaults to 'center'.")
parser.add_argument("--crop_overlap", type=float, default=0.0, help="Overlap of training data segments in seconds if crop_mode is 'segments'. Defaults to 0.")
parser.add_argument(
"--o", default="checkpoints/custom/Custom_Classifier", help="Path to trained classifier model output."
)
parser.add_argument("--epochs", type=int, default=100, help="Number of training epochs. Defaults to 100.")
parser.add_argument("--batch_size", type=int, default=32, help="Batch size. Defaults to 32.")
parser.add_argument("--val_split", type=float, default=0.2, help="Validation split ratio. Defaults to 0.2.")
parser.add_argument("--learning_rate", type=float, default=0.01, help="Learning rate. Defaults to 0.01.")
parser.add_argument(
"--hidden_units",
type=int,
default=0,
help="Number of hidden units. Defaults to 0. If set to >0, a two-layer classifier is used.",
)
parser.add_argument("--dropout", type=float, default=0.0, help="Dropout rate. Defaults to 0.")
parser.add_argument("--mixup", action=argparse.BooleanOptionalAction, help="Whether to use mixup for training.")
parser.add_argument("--upsampling_ratio", type=float, default=0.0, help="Balance train data and upsample minority classes. Values between 0 and 1. Defaults to 0.")
parser.add_argument("--upsampling_mode", default="repeat", help="Upsampling mode. Can be 'repeat', 'mean' or 'smote'. Defaults to 'repeat'.")
parser.add_argument("--model_format", default="tflite", help="Model output format. Can be 'tflite', 'raven' or 'both'. Defaults to 'tflite'.")
parser.add_argument("--cache_mode", default="none", help="Cache mode. Can be 'none', 'load' or 'save'. Defaults to 'none'.")
parser.add_argument("--cache_file", default="train_cache.npz", help="Path to cache file. Defaults to 'train_cache.npz'.")
args = parser.parse_args()
# Config
cfg.TRAIN_DATA_PATH = args.i
cfg.SAMPLE_CROP_MODE = args.crop_mode
cfg.SIG_OVERLAP = args.crop_overlap
cfg.CUSTOM_CLASSIFIER = args.o
cfg.TRAIN_EPOCHS = args.epochs
cfg.TRAIN_BATCH_SIZE = args.batch_size
cfg.TRAIN_VAL_SPLIT = args.val_split
cfg.TRAIN_LEARNING_RATE = args.learning_rate
cfg.TRAIN_HIDDEN_UNITS = args.hidden_units
cfg.TRAIN_DROPOUT = min(max(0, args.dropout), 0.9)
cfg.TRAIN_WITH_MIXUP = args.mixup
cfg.UPSAMPLING_RATIO = min(max(0, args.upsampling_ratio), 1)
cfg.UPSAMPLING_MODE = args.upsampling_mode
cfg.TRAINED_MODEL_OUTPUT_FORMAT = args.model_format
cfg.TRAIN_CACHE_MODE = args.cache_mode.lower()
cfg.TRAIN_CACHE_FILE = args.cache_file
cfg.TFLITE_THREADS = 4 # Set this to 4 to speed things up a bit
# Train model
trainModel()
|