Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| import numpy as np | |
| from sklearn.preprocessing import FunctionTransformer, SplineTransformer | |
| from sklearn.base import BaseEstimator, TransformerMixin | |
| from sklearn.utils.validation import check_is_fitted | |
| class GroupImputer(BaseEstimator, TransformerMixin): | |
| """ | |
| Class used for imputing missing values in a pd.DataFrame | |
| using mean, median, or mode by groupwise aggregation, | |
| or a constant. | |
| Parameters: | |
| ----------- | |
| target : str | |
| - The name of the column to be imputed | |
| group_cols : list | |
| - List of name(s) of columns on which to groupby | |
| strategy : str | |
| - The method for replacement; can be any of | |
| ['mean', 'median', 'mode'] | |
| Returns: | |
| -------- | |
| X : pd.DataFrame | |
| - The dataframe with imputed values in the target column | |
| """ | |
| def __init__(self,target,group_cols=None,strategy='median'): | |
| assert strategy in ['mean','median','mode'], "strategy must be in ['mean', 'median', 'mode']'" | |
| assert type(group_cols)==list, 'group_cols must be a list of column names' | |
| assert type(target) == str, 'target must be a string' | |
| self.group_cols = group_cols | |
| self.strategy=strategy | |
| self.target = target | |
| def fit(self,X,y=None): | |
| if self.strategy=='mode': | |
| impute_map = X.groupby(self.group_cols)[self.target]\ | |
| .agg(lambda x: pd.Series.mode(x,dropna=False)[0])\ | |
| .reset_index(drop=False) | |
| else: | |
| impute_map = X.groupby(self.group_cols)[self.target]\ | |
| .agg(self.strategy).reset_index(drop=False) | |
| self.impute_map_ = impute_map | |
| return self | |
| def transform(self,X,y=None): | |
| check_is_fitted(self,'impute_map_') | |
| X=X.copy() | |
| for index,row in self.impute_map_.iterrows(): | |
| ind = (X[self.group_cols] == row[self.group_cols]).all(axis=1) | |
| X.loc[ind,self.target] = X.loc[ind,self.target].fillna(row[self.target]) | |
| return X | |
| # Sine and consine transformations | |
| def sin_feature_names(transformer, feature_names): | |
| return [f'SIN_{col}' for col in feature_names] | |
| def cos_feature_names(transformer, feature_names): | |
| return [f'COS_{col}' for col in feature_names] | |
| def sin_transformer(period): | |
| return FunctionTransformer(lambda x: np.sin(2*np.pi*x/period),feature_names_out = sin_feature_names) | |
| def cos_transformer(period): | |
| return FunctionTransformer(lambda x: np.cos(2*np.pi*x/period),feature_names_out = cos_feature_names) | |
| # Periodic spline transformation | |
| def periodic_spline_transformer(period, n_splines=None, degree=3): | |
| if n_splines is None: | |
| n_splines = period | |
| n_knots = n_splines + 1 # periodic and include_bias is True | |
| return SplineTransformer( | |
| degree=degree, | |
| n_knots=n_knots, | |
| knots=np.linspace(0, period, n_knots).reshape(n_knots, 1), | |
| extrapolation="periodic", | |
| include_bias=True, | |
| ) |