sentry-ml-api / src /features.py
Testys's picture
First Layer for ML Model
01ca3ba
# src/feature.py
import pandas as pd
import numpy as np
from typing import List
from src.utils import logger
def engineer_features(df:pd.DataFrame) -> pd.DataFrame:
"""
Engineer Features from raw metrics
Args:
df(pd.DataFrame): Raw Data from the system
Returns:
pf.DataFrame: Data with added Features
"""
try:
df["timestamp"] = pd.to_datetime(df["timestamp"])
df = df.sort_values(["node", "timestamp"])
grouped = df.groupby("node")
df["cpu_trend"] = grouped["cpu_usage"].transform(lambda x:x.diff())
df["cpu_rolling_mean"] = grouped["cpu_usage"].transform(lambda x:x.rolling(window=5, min_periods=1).mean())
df["error_rate_lag1"] = grouped["rpc_error_rate"].shift(1)
df["latency_rolling_std"] = grouped["rpc_latency_ms"].transform(lambda x:x.rolling(window=5).std())
df = df.fillna(0)
return df
except KeyError as e:
logger.error(f"Missing Column in Data: {e}")
raise
except Exception as e:
logger.error(f"Error engineering features: {e}")
def main(input_path:str = "data/raw/synthetic_rpc_metrics_realistic.csv", output_path:str = "data/processed/engineered_metrics.csv") -> None:
"""
Main function to engineer features from raw data
Args:
input_path(str): Path to raw data CSV
output_path(str): Path to save engineered features CSV
"""
try:
df = pd.read_csv(input_path)
df_engineered = engineer_features(df)
df_engineered.to_csv(output_path, index=False)
logger.info(f"Engineered features saved to {output_path}")
except Exception as e:
logger.error(f"Error in main function: {e}")
if __name__ == "__main__":
main()