Booguy's picture
Create data_reader.py
419b0a4
raw
history blame
No virus
1.2 kB
import os
from pathlib import Path
from typing import Tuple
import pandas as pd
from dotenv import load_dotenv
load_dotenv()
def set_env_if_kaggle_environ() -> None:
if 'KAGGLE_DATA_PROXY_TOKEN' in os.environ:
os.environ['DATA_PATH'] = '/kaggle/input/feedback-prize-english-language-learning/'
def load_train_test_df(is_testing: bool = False) -> Tuple[pd.DataFrame, pd.DataFrame]:
"""Loads train/test dataframes
:param is_testing: If set to true, load subsample of train/test dataframes
:return Train and test dataframes
"""
set_env_if_kaggle_environ()
if is_testing:
train_df_path = Path("tests/data/train_sample.csv")
test_df_path = Path("tests/data/test_sample.csv")
else:
train_df_path = Path(os.environ['DATA_PATH']) / 'train.csv'
test_df_path = Path(os.environ['DATA_PATH']) / 'test.csv'
if not test_df_path.is_file():
raise OSError(f"File not found: {test_df_path.absolute()}")
if not train_df_path.is_file():
raise OSError(f"File not found: {train_df_path.absolute()}")
train_df = pd.read_csv(train_df_path)
test_df = pd.read_csv(test_df_path)
return train_df, test_df