Spaces:
Sleeping
Sleeping
| from abc import ABC, abstractmethod | |
| import pandas as pd | |
| import numpy as np | |
| from src.core import setup_logger | |
| logger = setup_logger(__name__) | |
| class FeatureEngineeringStrategy(ABC): | |
| def apply_transformation(self, df: pd.DataFrame) -> pd.DataFrame: | |
| pass | |
| class DateTransformation(FeatureEngineeringStrategy): | |
| def apply_transformation(self, df: pd.DataFrame) -> pd.DataFrame: | |
| logger.info("Applying date transformation.") | |
| df_transformed = df.copy() | |
| date_col = "date" if "date" in df.columns else "Date" | |
| if date_col not in df.columns: | |
| return df | |
| df_transformed[date_col] = pd.to_datetime(df[date_col]) | |
| df_transformed["Year"] = df_transformed[date_col].dt.year | |
| df_transformed["Month"] = df_transformed[date_col].dt.month | |
| df_transformed["Day"] = df_transformed[date_col].dt.day | |
| df_transformed["DayOfWeek"] = df_transformed[date_col].dt.dayofweek + 1 | |
| df_transformed["IsWeekend"] = ( | |
| df_transformed[date_col].dt.dayofweek >= 5 | |
| ).astype(int) | |
| df_transformed["DayOfMonth"] = df_transformed[date_col].dt.day | |
| return df_transformed | |
| class FourierSeriesSeasonality(FeatureEngineeringStrategy): | |
| def __init__(self, period: float = 365.25, order: int = 3): | |
| self.period = period | |
| self.order = order | |
| def apply_transformation(self, df: pd.DataFrame) -> pd.DataFrame: | |
| logger.info(f"Applying Fourier terms (order={self.order})") | |
| df_transformed = df.copy() | |
| date_col = "date" if "date" in df.columns else "Date" | |
| times = ( | |
| pd.to_datetime(df_transformed[date_col]).values.view(np.int64) | |
| / 10**9 | |
| / (60 * 60 * 24) | |
| ) | |
| for i in range(1, self.order + 1): | |
| df_transformed[f"fourier_sin_{i}"] = np.sin( | |
| 2 * np.pi * i * times / self.period | |
| ) | |
| df_transformed[f"fourier_cos_{i}"] = np.cos( | |
| 2 * np.pi * i * times / self.period | |
| ) | |
| return df_transformed | |
| class EasterFeature(FeatureEngineeringStrategy): | |
| def apply_transformation(self, df: pd.DataFrame) -> pd.DataFrame: | |
| logger.info("Applying Easter feature.") | |
| df_transformed = df.copy() | |
| date_col = "date" if "date" in df.columns else "Date" | |
| dates = pd.to_datetime(df_transformed[date_col]) | |
| easter_dates = { | |
| 2013: "2013-03-31", | |
| 2014: "2014-04-20", | |
| 2015: "2015-04-05", | |
| 2016: "2016-03-27", | |
| } | |
| df_transformed["days_to_easter"] = 999 | |
| for year, date_str in easter_dates.items(): | |
| mask = dates.dt.year == year | |
| df_transformed.loc[mask, "days_to_easter"] = ( | |
| dates[mask] - pd.to_datetime(date_str) | |
| ).dt.days | |
| df_transformed["easter_effect"] = ( | |
| (df_transformed["days_to_easter"] >= -7) | |
| & (df_transformed["days_to_easter"] <= 7) | |
| ).astype(int) | |
| return df_transformed | |
| class RossmannFeatureEngineering(FeatureEngineeringStrategy): | |
| def apply_transformation(self, df: pd.DataFrame) -> pd.DataFrame: | |
| logger.info("Applying Rossmann retail features.") | |
| df_transformed = df.copy() | |
| if "StateHoliday" in df_transformed.columns: | |
| df_transformed["StateHoliday"] = ( | |
| df_transformed["StateHoliday"] | |
| .astype(str) | |
| .map({"0": 0, "a": 1, "b": 2, "c": 3}) | |
| .fillna(0) | |
| ) | |
| if "CompetitionDistance" in df_transformed.columns: | |
| df_transformed["CompetitionDistance"] = df_transformed[ | |
| "CompetitionDistance" | |
| ].fillna(100000) | |
| if ( | |
| "CompetitionOpenSinceYear" in df_transformed.columns | |
| and "Year" in df_transformed.columns | |
| ): | |
| df_transformed["CompetitionOpenTime"] = 12 * ( | |
| df_transformed["Year"] - df_transformed["CompetitionOpenSinceYear"] | |
| ) + (df_transformed["Month"] - df_transformed["CompetitionOpenSinceMonth"]) | |
| df_transformed["CompetitionOpenTime"] = df_transformed[ | |
| "CompetitionOpenTime" | |
| ].apply(lambda x: x if x > 0 else 0) | |
| return df_transformed | |
| class FeatureEngineer: | |
| def __init__(self, strategy: FeatureEngineeringStrategy): | |
| self._strategy = strategy | |
| def set_strategy(self, strategy: FeatureEngineeringStrategy): | |
| self._strategy = strategy | |
| def apply_feature_engineering(self, df: pd.DataFrame) -> pd.DataFrame: | |
| return self._strategy.apply_transformation(df) | |