from pandas import DataFrame, Series, read_csv from sklearn.model_selection import train_test_split from src.config import DATASET_FILE_PATH def get_dataset() -> DataFrame: """ Get the dataset Returns: DataFrame: The dataset as a DataFrame """ try: return DataFrame(data=read_csv(DATASET_FILE_PATH)) except FileNotFoundError: return DataFrame(data={}) def get_features_target(df: DataFrame) -> tuple[DataFrame, Series]: """ Get the feature and target from the dataset Args: df (DataFrame): The dataset as a DataFrame Returns: tuple[DataFrame, Series]: The features and target as a tuple """ return df.drop(columns=["TARGET"], axis=1), df["TARGET"] def get_train_test_sets( X: DataFrame, y: Series ) -> tuple[DataFrame, Series, DataFrame, Series]: """ Get the train and test sets from the features and target Args: features (DataFrame): The features as a DataFrame target (Series): The target as a Series Returns: tuple[DataFrame, Series, DataFrame, Series]: The train and test sets as a tuple """ X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42 ) return X_train, y_train, X_test, y_test