|
import os |
|
from pathlib import Path |
|
from typing import Tuple |
|
|
|
import pandas as pd |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_train_test_df(is_testing: bool = False) -> Tuple[pd.DataFrame, pd.DataFrame]: |
|
"""Loads train/test dataframes |
|
:param is_testing: If set to true, load subsample of train/test dataframes |
|
:return Train and test dataframes |
|
""" |
|
|
|
|
|
if is_testing: |
|
train_df_path = Path("tests/data/train_sample.csv") |
|
test_df_path = Path("tests/data/test_sample.csv") |
|
|
|
else: |
|
train_df_path = Path(os.environ['DATA_PATH']) / 'train.csv' |
|
test_df_path = Path(os.environ['DATA_PATH']) / 'test.csv' |
|
|
|
if not test_df_path.is_file(): |
|
raise OSError(f"File not found: {test_df_path.absolute()}") |
|
|
|
if not train_df_path.is_file(): |
|
raise OSError(f"File not found: {train_df_path.absolute()}") |
|
|
|
train_df = pd.read_csv(train_df_path) |
|
test_df = pd.read_csv(test_df_path) |
|
|
|
return train_df, test_df |