Spaces:
Configuration error
Configuration error
| # src/feature.py | |
| import pandas as pd | |
| import numpy as np | |
| from typing import List | |
| from src.utils import logger | |
| def engineer_features(df:pd.DataFrame) -> pd.DataFrame: | |
| """ | |
| Engineer Features from raw metrics | |
| Args: | |
| df(pd.DataFrame): Raw Data from the system | |
| Returns: | |
| pf.DataFrame: Data with added Features | |
| """ | |
| try: | |
| df["timestamp"] = pd.to_datetime(df["timestamp"]) | |
| df = df.sort_values(["node", "timestamp"]) | |
| grouped = df.groupby("node") | |
| df["cpu_trend"] = grouped["cpu_usage"].transform(lambda x:x.diff()) | |
| df["cpu_rolling_mean"] = grouped["cpu_usage"].transform(lambda x:x.rolling(window=5, min_periods=1).mean()) | |
| df["error_rate_lag1"] = grouped["rpc_error_rate"].shift(1) | |
| df["latency_rolling_std"] = grouped["rpc_latency_ms"].transform(lambda x:x.rolling(window=5).std()) | |
| df = df.fillna(0) | |
| return df | |
| except KeyError as e: | |
| logger.error(f"Missing Column in Data: {e}") | |
| raise | |
| except Exception as e: | |
| logger.error(f"Error engineering features: {e}") | |
| def main(input_path:str = "data/raw/synthetic_rpc_metrics_realistic.csv", output_path:str = "data/processed/engineered_metrics.csv") -> None: | |
| """ | |
| Main function to engineer features from raw data | |
| Args: | |
| input_path(str): Path to raw data CSV | |
| output_path(str): Path to save engineered features CSV | |
| """ | |
| try: | |
| df = pd.read_csv(input_path) | |
| df_engineered = engineer_features(df) | |
| df_engineered.to_csv(output_path, index=False) | |
| logger.info(f"Engineered features saved to {output_path}") | |
| except Exception as e: | |
| logger.error(f"Error in main function: {e}") | |
| if __name__ == "__main__": | |
| main() |