import os from pathlib import Path from typing import Tuple import pandas as pd # from dotenv import load_dotenv # # load_dotenv() # def set_env_if_kaggle_environ() -> None: # if 'KAGGLE_DATA_PROXY_TOKEN' in os.environ: # os.environ['DATA_PATH'] = '/kaggle/input/feedback-prize-english-language-learning/' def load_train_test_df(is_testing: bool = False) -> Tuple[pd.DataFrame, pd.DataFrame]: """Loads train/test dataframes :param is_testing: If set to true, load subsample of train/test dataframes :return Train and test dataframes """ # set_env_if_kaggle_environ() if is_testing: train_df_path = Path("tests/data/train_sample.csv") test_df_path = Path("tests/data/test_sample.csv") else: train_df_path = Path(os.environ['DATA_PATH']) / 'train.csv' test_df_path = Path(os.environ['DATA_PATH']) / 'test.csv' if not test_df_path.is_file(): raise OSError(f"File not found: {test_df_path.absolute()}") if not train_df_path.is_file(): raise OSError(f"File not found: {train_df_path.absolute()}") train_df = pd.read_csv(train_df_path) test_df = pd.read_csv(test_df_path) return train_df, test_df