# -*- coding: utf-8 -*- # Copyright 2020 Minh Nguyen (@dathudeptrai) # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Outliers detection and remove.""" import numpy as np def is_outlier(x, p25, p75): """Check if value is an outlier.""" lower = p25 - 1.5 * (p75 - p25) upper = p75 + 1.5 * (p75 - p25) return x <= lower or x >= upper def remove_outlier(x, p_bottom: int = 25, p_top: int = 75): """Remove outlier from x.""" p_bottom = np.percentile(x, p_bottom) p_top = np.percentile(x, p_top) indices_of_outliers = [] for ind, value in enumerate(x): if is_outlier(value, p_bottom, p_top): indices_of_outliers.append(ind) x[indices_of_outliers] = 0.0 # replace by mean f0. x[indices_of_outliers] = np.max(x) return x