from abc import ABC, abstractmethod from typing import Union import numpy as np import pandas as pd from sklearn.model_selection import train_test_split import os current_directory = os.path.dirname(os.path.abspath(__file__)) print(current_directory) class DataStrategy(ABC): """ Abstract class defining strategy for handling data """ @abstractmethod def handle_data(self, df_movie: pd.DataFrame, df_user: pd.DataFrame, df_rating: pd.DataFrame) -> Union[pd.DataFrame, pd.Series]: pass class DataCombiningBaseline(DataStrategy): """ Baseline method to return data """ def handle_data(self, df_movie: pd.DataFrame, df_user: pd.DataFrame, df_rating: pd.DataFrame) -> Union[pd.DataFrame, pd.Series]: result_df = df_rating.pivot(index='movie_id', columns='user_id', values='rating') # Reset the index to turn movie_id back into a column result_df.reset_index(inplace=True) # Fill missing values with NaN (or another value if preferred, like 0) result_df.fillna(np.nan, inplace=True) # Rename columns to match user_id result_df.columns.name = None # Remove the name of the columns index return result_df class DataPreprocessing: """ Class for cleaning data which processes the data and divides it into train and test """ def __init__(self, df_movie: pd.DataFrame, df_user: pd.DataFrame, df_rating: pd.DataFrame, strategy: DataStrategy): self.df_movie = df_movie self.df_user = df_user self.df_rating = df_rating self.strategy = strategy def handle_data(self) -> Union[pd.DataFrame, pd.Series]: """ Handle data """ try: return self.strategy.handle_data(self.df_movie, self.df_user, self.df_rating) except Exception as e: logging.error(f"Error in handling data: {e}") raise e