import logging import pandas as pd from zenml import step from typing_extensions import Annotated from typing import Tuple from src.data_preprocessing import DataPreprocessing, DataCombiningBaseline @step def preprocess_data( df_movie: pd.DataFrame, df_user: pd.DataFrame, df_rating: pd.DataFrame, ) -> Annotated[pd.DataFrame, "df_total"]: """ Preprocess Args: df: Raw data Returns: X_train: Training data X_test: Testing data y_train: Training labels y_test: Testing labels """ try: process_strategy = DataCombiningBaseline() data_preprocess_class = DataPreprocessing(df_movie, df_user, df_rating, process_strategy) processed_data = data_preprocess_class.handle_data() logging.info(f"df_test: \n{processed_data}") return processed_data except Exception as e: logging.error(f"Error in handling data: {e}") raise e