Booguy commited on
Commit
419b0a4
1 Parent(s): d438765

Create data_reader.py

Browse files
Files changed (1) hide show
  1. src/data_reader.py +42 -0
src/data_reader.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ from typing import Tuple
4
+
5
+ import pandas as pd
6
+ from dotenv import load_dotenv
7
+
8
+ load_dotenv()
9
+
10
+
11
+ def set_env_if_kaggle_environ() -> None:
12
+ if 'KAGGLE_DATA_PROXY_TOKEN' in os.environ:
13
+ os.environ['DATA_PATH'] = '/kaggle/input/feedback-prize-english-language-learning/'
14
+
15
+
16
+ def load_train_test_df(is_testing: bool = False) -> Tuple[pd.DataFrame, pd.DataFrame]:
17
+ """Loads train/test dataframes
18
+
19
+ :param is_testing: If set to true, load subsample of train/test dataframes
20
+ :return Train and test dataframes
21
+
22
+ """
23
+ set_env_if_kaggle_environ()
24
+
25
+ if is_testing:
26
+ train_df_path = Path("tests/data/train_sample.csv")
27
+ test_df_path = Path("tests/data/test_sample.csv")
28
+
29
+ else:
30
+ train_df_path = Path(os.environ['DATA_PATH']) / 'train.csv'
31
+ test_df_path = Path(os.environ['DATA_PATH']) / 'test.csv'
32
+
33
+ if not test_df_path.is_file():
34
+ raise OSError(f"File not found: {test_df_path.absolute()}")
35
+
36
+ if not train_df_path.is_file():
37
+ raise OSError(f"File not found: {train_df_path.absolute()}")
38
+
39
+ train_df = pd.read_csv(train_df_path)
40
+ test_df = pd.read_csv(test_df_path)
41
+
42
+ return train_df, test_df