Spaces:
Runtime error
Runtime error
from abc import ABC, abstractmethod | |
from typing import Union | |
import numpy as np | |
import pandas as pd | |
from sklearn.model_selection import train_test_split | |
import os | |
current_directory = os.path.dirname(os.path.abspath(__file__)) | |
print(current_directory) | |
class DataStrategy(ABC): | |
""" | |
Abstract class defining strategy for handling data | |
""" | |
def handle_data(self, df_movie: pd.DataFrame, df_user: pd.DataFrame, df_rating: pd.DataFrame) -> Union[pd.DataFrame, pd.Series]: | |
pass | |
class DataCombiningBaseline(DataStrategy): | |
""" | |
Baseline method to return data | |
""" | |
def handle_data(self, df_movie: pd.DataFrame, df_user: pd.DataFrame, df_rating: pd.DataFrame) -> Union[pd.DataFrame, pd.Series]: | |
result_df = df_rating.pivot(index='movie_id', columns='user_id', values='rating') | |
# Reset the index to turn movie_id back into a column | |
result_df.reset_index(inplace=True) | |
# Fill missing values with NaN (or another value if preferred, like 0) | |
result_df.fillna(np.nan, inplace=True) | |
# Rename columns to match user_id | |
result_df.columns.name = None # Remove the name of the columns index | |
return result_df | |
class DataPreprocessing: | |
""" | |
Class for cleaning data which processes the data and divides it into train and test | |
""" | |
def __init__(self, df_movie: pd.DataFrame, df_user: pd.DataFrame, df_rating: pd.DataFrame, strategy: DataStrategy): | |
self.df_movie = df_movie | |
self.df_user = df_user | |
self.df_rating = df_rating | |
self.strategy = strategy | |
def handle_data(self) -> Union[pd.DataFrame, pd.Series]: | |
""" | |
Handle data | |
""" | |
try: | |
return self.strategy.handle_data(self.df_movie, self.df_user, self.df_rating) | |
except Exception as e: | |
logging.error(f"Error in handling data: {e}") | |
raise e | |