Spaces:
Build error
Build error
File size: 12,962 Bytes
98f685a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 |
# !/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2021/3/9 16:33
# @Author : dongchao yang
# @File : train.py
import collections
import sys
from loguru import logger
from pprint import pformat
import numpy as np
import pandas as pd
import scipy
import six
import sklearn.preprocessing as pre
import torch
import tqdm
import yaml
from scipy.interpolate import interp1d
def parse_config_or_kwargs(config_file, **kwargs):
"""parse_config_or_kwargs
:param config_file: Config file that has parameters, yaml format
:param **kwargs: Other alternative parameters or overwrites for config
"""
with open(config_file) as con_read:
yaml_config = yaml.load(con_read, Loader=yaml.FullLoader)
arguments = dict(yaml_config, **kwargs)
return arguments
def find_contiguous_regions(activity_array): # in this part, if you cannot understand the binary operation, I think you can write a O(n) complexity method
"""Find contiguous regions from bool valued numpy.array.
Copy of https://dcase-repo.github.io/dcase_util/_modules/dcase_util/data/decisions.html#DecisionEncoder
Reason is:
1. This does not belong to a class necessarily
2. Import DecisionEncoder requires sndfile over some other imports..which causes some problems on clusters
"""
change_indices = np.logical_xor(activity_array[1:], activity_array[:-1]).nonzero()[0]
change_indices += 1
if activity_array[0]:
# If the first element of activity_array is True add 0 at the beginning
change_indices = np.r_[0, change_indices]
if activity_array[-1]:
# If the last element of activity_array is True, add the length of the array
change_indices = np.r_[change_indices, activity_array.size]
# print(change_indices.reshape((-1, 2)))
# Reshape the result into two columns
return change_indices.reshape((-1, 2))
def split_train_cv(
data_frame: pd.DataFrame,
frac: float = 0.9,
y=None, # Only for stratified, computes necessary split
**kwargs):
"""split_train_cv
:param data_frame:
:type data_frame: pd.DataFrame
:param frac:
:type frac: float
"""
if kwargs.get('mode',
None) == 'urbansed': # Filenames are DATA_-1 DATA_-2 etc
data_frame.loc[:, 'id'] = data_frame.groupby(
data_frame['filename'].str.split('_').apply(
lambda x: '_'.join(x[:-1]))).ngroup()
sampler = np.random.permutation(data_frame['id'].nunique())
num_train = int(frac * len(sampler))
train_indexes = sampler[:num_train]
cv_indexes = sampler[num_train:]
train_data = data_frame[data_frame['id'].isin(train_indexes)]
cv_data = data_frame[data_frame['id'].isin(cv_indexes)]
del train_data['id']
del cv_data['id']
elif kwargs.get('mode', None) == 'stratified': # stratified --> 分层的 ?
# Use statified sampling
from skmultilearn.model_selection import iterative_train_test_split
index_train, _, index_cv, _ = iterative_train_test_split(
data_frame.index.values.reshape(-1, 1), y, test_size=1. - frac)
train_data = data_frame[data_frame.index.isin(index_train.squeeze())]
cv_data = data_frame[data_frame.index.isin(index_cv.squeeze())] # cv --> cross validation
else:
# Simply split train_test
train_data = data_frame.sample(frac=frac, random_state=10)
cv_data = data_frame[~data_frame.index.isin(train_data.index)]
return train_data, cv_data
def pprint_dict(in_dict, outputfun=sys.stdout.write, formatter='yaml'): # print yaml file
"""pprint_dict
:param outputfun: function to use, defaults to sys.stdout
:param in_dict: dict to print
"""
if formatter == 'yaml':
format_fun = yaml.dump
elif formatter == 'pretty':
format_fun = pformat
for line in format_fun(in_dict).split('\n'):
outputfun(line)
def getfile_outlogger(outputfile):
log_format = "[<green>{time:YYYY-MM-DD HH:mm:ss}</green>] {message}"
logger.configure(handlers=[{"sink": sys.stderr, "format": log_format}])
if outputfile:
logger.add(outputfile, enqueue=True, format=log_format)
return logger
# according label, get encoder
def train_labelencoder(labels: pd.Series, sparse=True):
"""encode_labels
Encodes labels
:param labels: pd.Series representing the raw labels e.g., Speech, Water
:param encoder (optional): Encoder already fitted
returns encoded labels (many hot) and the encoder
"""
assert isinstance(labels, pd.Series), "Labels need to be series"
if isinstance(labels[0], six.string_types):
# In case of using non processed strings, e.g., Vaccum, Speech
label_array = labels.str.split(',').values.tolist() # split label according to ','
elif isinstance(labels[0], np.ndarray):
# Encoder does not like to see numpy array
label_array = [lab.tolist() for lab in labels]
elif isinstance(labels[0], collections.Iterable):
label_array = labels
encoder = pre.MultiLabelBinarizer(sparse_output=sparse)
encoder.fit(label_array)
return encoder
def encode_labels(labels: pd.Series, encoder=None, sparse=True):
"""encode_labels
Encodes labels
:param labels: pd.Series representing the raw labels e.g., Speech, Water
:param encoder (optional): Encoder already fitted
returns encoded labels (many hot) and the encoder
"""
assert isinstance(labels, pd.Series), "Labels need to be series"
instance = labels.iloc[0]
if isinstance(instance, six.string_types):
# In case of using non processed strings, e.g., Vaccum, Speech
label_array = labels.str.split(',').values.tolist()
elif isinstance(instance, np.ndarray):
# Encoder does not like to see numpy array
label_array = [lab.tolist() for lab in labels]
elif isinstance(instance, collections.Iterable):
label_array = labels
# get label_array, it is a list ,contain a lot of label, this label are string type
if not encoder:
encoder = pre.MultiLabelBinarizer(sparse_output=sparse) # if we encoder is None, we should init a encoder firstly.
encoder.fit(label_array)
labels_encoded = encoder.transform(label_array) # transform string to digit
return labels_encoded, encoder
# return pd.arrays.SparseArray(
# [row.toarray().ravel() for row in labels_encoded]), encoder
def decode_with_timestamps(events,labels: np.array):
"""decode_with_timestamps
Decodes the predicted label array (2d) into a list of
[(Labelname, onset, offset), ...]
:param encoder: Encoder during training
:type encoder: pre.MultiLabelBinarizer
:param labels: n-dim array
:type labels: np.array
"""
# print('events ',events)
# print('labels ',labels.shape)
#assert 1==2
if labels.ndim == 2:
#print('...')
return [_decode_with_timestamps(events[i],labels[i]) for i in range(labels.shape[0])]
else:
return _decode_with_timestamps(events,labels)
def median_filter(x, window_size, threshold=0.5):
"""median_filter
:param x: input prediction array of shape (B, T, C) or (B, T).
Input is a sequence of probabilities 0 <= x <= 1
:param window_size: An integer to use
:param threshold: Binary thresholding threshold
"""
x = binarize(x, threshold=threshold) # transfer to 0 or 1
if x.ndim == 3:
size = (1, window_size, 1)
elif x.ndim == 2 and x.shape[0] == 1:
# Assume input is class-specific median filtering
# E.g, Batch x Time [1, 501]
size = (1, window_size)
elif x.ndim == 2 and x.shape[0] > 1:
# Assume input is standard median pooling, class-independent
# E.g., Time x Class [501, 10]
size = (window_size, 1)
return scipy.ndimage.median_filter(x, size=size)
def _decode_with_timestamps(events,labels):
result_labels = []
# print('.......')
# print('labels ',labels.shape)
# print(labels)
change_indices = find_contiguous_regions(labels)
# print(change_indices)
# assert 1==2
for row in change_indices:
result_labels.append((events,row[0], row[1]))
return result_labels
def inverse_transform_labels(encoder, pred):
if pred.ndim == 3:
return [encoder.inverse_transform(x) for x in pred]
else:
return encoder.inverse_transform(pred)
def binarize(pred, threshold=0.5):
# Batch_wise
if pred.ndim == 3:
return np.array(
[pre.binarize(sub, threshold=threshold) for sub in pred])
else:
return pre.binarize(pred, threshold=threshold)
def double_threshold(x, high_thres, low_thres, n_connect=1):
"""double_threshold
Helper function to calculate double threshold for n-dim arrays
:param x: input array
:param high_thres: high threshold value
:param low_thres: Low threshold value
:param n_connect: Distance of <= n clusters will be merged
"""
assert x.ndim <= 3, "Whoops something went wrong with the input ({}), check if its <= 3 dims".format(
x.shape)
if x.ndim == 3:
apply_dim = 1
elif x.ndim < 3:
apply_dim = 0
# x is assumed to be 3d: (batch, time, dim)
# Assumed to be 2d : (time, dim)
# Assumed to be 1d : (time)
# time axis is therefore at 1 for 3d and 0 for 2d (
return np.apply_along_axis(lambda x: _double_threshold(
x, high_thres, low_thres, n_connect=n_connect),
axis=apply_dim,
arr=x)
def _double_threshold(x, high_thres, low_thres, n_connect=1, return_arr=True): # in nature, double_threshold considers boundary question
"""_double_threshold
Computes a double threshold over the input array
:param x: input array, needs to be 1d
:param high_thres: High threshold over the array
:param low_thres: Low threshold over the array
:param n_connect: Postprocessing, maximal distance between clusters to connect
:param return_arr: By default this function returns the filtered indiced, but if return_arr = True it returns an array of tsame size as x filled with ones and zeros.
"""
assert x.ndim == 1, "Input needs to be 1d"
high_locations = np.where(x > high_thres)[0] # return the index, where value is greater than high_thres
locations = x > low_thres # return true of false
encoded_pairs = find_contiguous_regions(locations)
# print('encoded_pairs ',encoded_pairs)
filtered_list = list(
filter(
lambda pair:
((pair[0] <= high_locations) & (high_locations <= pair[1])).any(),
encoded_pairs)) # find encoded_pair where inclide a high_lacations
#print('filtered_list ',filtered_list)
filtered_list = connect_(filtered_list, n_connect) # if the distance of two pair is less than n_connect, we can merge them
if return_arr:
zero_one_arr = np.zeros_like(x, dtype=int)
for sl in filtered_list:
zero_one_arr[sl[0]:sl[1]] = 1
return zero_one_arr
return filtered_list
def connect_clusters(x, n=1):
if x.ndim == 1:
return connect_clusters_(x, n)
if x.ndim >= 2:
return np.apply_along_axis(lambda a: connect_clusters_(a, n=n), -2, x)
def connect_clusters_(x, n=1):
"""connect_clusters_
Connects clustered predictions (0,1) in x with range n
:param x: Input array. zero-one format
:param n: Number of frames to skip until connection can be made
"""
assert x.ndim == 1, "input needs to be 1d"
reg = find_contiguous_regions(x)
start_end = connect_(reg, n=n)
zero_one_arr = np.zeros_like(x, dtype=int)
for sl in start_end:
zero_one_arr[sl[0]:sl[1]] = 1
return zero_one_arr
def connect_(pairs, n=1):
"""connect_
Connects two adjacent clusters if their distance is <= n
:param pairs: Clusters of iterateables e.g., [(1,5),(7,10)]
:param n: distance between two clusters
"""
if len(pairs) == 0:
return []
start_, end_ = pairs[0]
new_pairs = []
for i, (next_item, cur_item) in enumerate(zip(pairs[1:], pairs[0:])):
end_ = next_item[1]
if next_item[0] - cur_item[1] <= n:
pass
else:
new_pairs.append((start_, cur_item[1]))
start_ = next_item[0]
new_pairs.append((start_, end_))
return new_pairs
def predictions_to_time(df, ratio):
df.onset = df.onset * ratio
df.offset = df.offset * ratio
return df
def upgrade_resolution(arr, scale):
print('arr ',arr.shape)
x = np.arange(0, arr.shape[0])
f = interp1d(x, arr, kind='linear', axis=0, fill_value='extrapolate')
scale_x = np.arange(0, arr.shape[0], 1 / scale)
up_scale = f(scale_x)
return up_scale
# a = [0.1,0.2,0.3,0.8,0.4,0.1,0.3,0.9,0.4]
# a = np.array(a)
# b = a>0.2
# _double_threshold(a,0.7,0.2) |