Dean commited on
Commit
7200298
1 Parent(s): 5ad6755

First finalized pipeline

Browse files

TODO: Add checkpointing capability

dvc.lock CHANGED
@@ -3,8 +3,8 @@ process_data:
3
  src/data/processed
4
  deps:
5
  - path: src/code/make_dataset.py
6
- md5: 3a0dee3a1ba9c587b8ca6ea6f0447ada
7
- size: 5227
8
  - path: src/data/raw/nyu_depth_v2_labeled.mat
9
  md5: 520609c519fba3ba5ac58c8fefcc3530
10
  size: 2972037809
 
3
  src/data/processed
4
  deps:
5
  - path: src/code/make_dataset.py
6
+ md5: e069c7323c9be16baedd8f988375e145
7
+ size: 5256
8
  - path: src/data/raw/nyu_depth_v2_labeled.mat
9
  md5: 520609c519fba3ba5ac58c8fefcc3530
10
  size: 2972037809
dvc.yaml CHANGED
@@ -11,21 +11,26 @@ stages:
11
  train:
12
  cmd: python3 src/code/training.py src/data/processed/train
13
  deps:
 
 
14
  - src/code/training.py
15
  - src/data/processed/train
16
  outs:
17
  - src/models/
 
 
18
  metrics:
19
  - logs/train_metrics.csv:
20
  cache: false
21
  eval:
22
  cmd: python3 src/code/eval.py src/data/processed/test
23
  deps:
 
 
 
24
  - src/code/eval.py
25
  - src/models/model.pth
26
  - src/data/processed/test
27
- outs:
28
- - src/eval/
29
  metrics:
30
  - logs/test_metrics.csv:
31
  cache: false
 
11
  train:
12
  cmd: python3 src/code/training.py src/data/processed/train
13
  deps:
14
+ - src/code/custom_data_loading.py
15
+ - src/code/params.yml
16
  - src/code/training.py
17
  - src/data/processed/train
18
  outs:
19
  - src/models/
20
+ - logs/train_params.yml:
21
+ cache: false
22
  metrics:
23
  - logs/train_metrics.csv:
24
  cache: false
25
  eval:
26
  cmd: python3 src/code/eval.py src/data/processed/test
27
  deps:
28
+ - src/code/params.yml
29
+ - src/code/custom_data_loading.py
30
+ - src/code/eval_metric_calculation.py
31
  - src/code/eval.py
32
  - src/models/model.pth
33
  - src/data/processed/test
 
 
34
  metrics:
35
  - logs/test_metrics.csv:
36
  cache: false
src/code/custom_data_loading.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from fastai.vision.all import \
2
  DataLoaders, \
3
  delegates, \
@@ -7,7 +8,8 @@ from fastai.vision.all import \
7
  PILImageBW, \
8
  RandomSplitter, \
9
  Path, \
10
- get_files
 
11
 
12
 
13
  class ImageImageDataLoaders(DataLoaders):
@@ -33,13 +35,24 @@ def get_y_fn(x):
33
  return y
34
 
35
 
36
- def create_data(data_path):
 
 
 
37
  filenames = get_files(data_path, extensions='.jpg')
38
  if len(filenames) == 0:
39
  raise ValueError("Could not find any files in the given path")
40
  dataset = ImageImageDataLoaders.from_label_func(data_path,
41
- seed=42,
42
- bs=4, num_workers=0,
 
43
  filenames=filenames,
44
  label_func=get_y_fn)
 
 
 
 
 
 
 
45
  return dataset
 
1
+ import yaml
2
  from fastai.vision.all import \
3
  DataLoaders, \
4
  delegates, \
 
8
  PILImageBW, \
9
  RandomSplitter, \
10
  Path, \
11
+ get_files, \
12
+ L
13
 
14
 
15
  class ImageImageDataLoaders(DataLoaders):
 
35
  return y
36
 
37
 
38
+ def create_data(data_path, is_test=False):
39
+ with open(r"./src/code/params.yml") as f:
40
+ params = yaml.safe_load(f)
41
+
42
  filenames = get_files(data_path, extensions='.jpg')
43
  if len(filenames) == 0:
44
  raise ValueError("Could not find any files in the given path")
45
  dataset = ImageImageDataLoaders.from_label_func(data_path,
46
+ seed=int(params['seed']),
47
+ bs=int(params['batch_size']),
48
+ num_workers=int(params['num_workers']),
49
  filenames=filenames,
50
  label_func=get_y_fn)
51
+
52
+ if is_test:
53
+ filenames = get_files(Path(data_path), extensions='.jpg')
54
+ test_files = L([Path(i) for i in filenames])
55
+ test_dl = dataset.test_dl(test_files, with_labels=True)
56
+ return dataset, test_dl
57
+
58
  return dataset
src/code/eval.py CHANGED
@@ -1,7 +1,8 @@
1
  import sys
2
- from fastai.vision.all import unet_learner, Path, resnet34, MSELossFlat, get_files, L, tuplify
3
- from src.code.custom_data_loading import create_data
4
- from src.code.eval_metric_calculation import compute_eval_metrics
 
5
  from dagshub import dagshub_logger
6
 
7
 
@@ -10,19 +11,24 @@ if __name__ == "__main__":
10
  print("usage: %s <test_data_path>" % sys.argv[0], file=sys.stderr)
11
  sys.exit(0)
12
 
 
 
 
13
  data_path = Path(sys.argv[1])
14
- data = create_data(data_path)
 
 
 
15
 
16
- filenames = get_files(Path(sys.argv[1]), extensions='.jpg')
17
- test_files = L([Path(i) for i in filenames])
18
- test_dl = data.test_dl(test_files, with_labels=True)
19
  learner = unet_learner(data,
20
- resnet34,
21
- n_out=3,
22
- loss_func=MSELossFlat(),
23
  path='src/',
24
  model_dir='models')
25
  learner = learner.load('model')
 
 
26
  inputs, predictions, targets, decoded = learner.get_preds(dl=test_dl,
27
  with_input=True,
28
  with_decoded=True)
@@ -31,6 +37,7 @@ if __name__ == "__main__":
31
  decoded_predictions = learner.dls.decode(inputs + tuplify(decoded))[1]
32
  decoded_targets = learner.dls.decode(inputs + tuplify(targets))[1]
33
 
 
34
  metrics = compute_eval_metrics(decoded_targets.numpy(), decoded_predictions.numpy())
35
 
36
  with dagshub_logger(
 
1
  import sys
2
+ import yaml
3
+ from fastai.vision.all import unet_learner, Path, resnet34, MSELossFlat, tuplify
4
+ from custom_data_loading import create_data
5
+ from eval_metric_calculation import compute_eval_metrics
6
  from dagshub import dagshub_logger
7
 
8
 
 
11
  print("usage: %s <test_data_path>" % sys.argv[0], file=sys.stderr)
12
  sys.exit(0)
13
 
14
+ with open(r"./src/code/params.yml") as f:
15
+ params = yaml.safe_load(f)
16
+
17
  data_path = Path(sys.argv[1])
18
+ data, test_dl = create_data(data_path, is_test=True)
19
+
20
+ arch = {'resnet34': resnet34}
21
+ loss = {'MSELossFlat': MSELossFlat()}
22
 
 
 
 
23
  learner = unet_learner(data,
24
+ arch.get(params['architecture']),
25
+ n_out=int(params['num_outs']),
26
+ loss_func=loss.get(params['loss_func']),
27
  path='src/',
28
  model_dir='models')
29
  learner = learner.load('model')
30
+
31
+ print("Running model on test data...")
32
  inputs, predictions, targets, decoded = learner.get_preds(dl=test_dl,
33
  with_input=True,
34
  with_decoded=True)
 
37
  decoded_predictions = learner.dls.decode(inputs + tuplify(decoded))[1]
38
  decoded_targets = learner.dls.decode(inputs + tuplify(targets))[1]
39
 
40
+ print("Calculating metrics...")
41
  metrics = compute_eval_metrics(decoded_targets.numpy(), decoded_predictions.numpy())
42
 
43
  with dagshub_logger(
src/code/params.yml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed: 42
2
+ data: nyu_depth_v2
3
+ batch_size: 4
4
+ num_workers: 0
5
+ weight_decay: 1e-2
6
+ learning_rate: 1e-3
7
+ epochs: 1
8
+ num_outs: 3
9
+ source_dir: src
10
+ model_dir: models
11
+ architecture: resnet34
12
+ loss_func: MSELossFlat
13
+ train_metric: rmse
src/code/training.py CHANGED
@@ -3,8 +3,9 @@ Receives 1 arguments from argparse:
3
  <data_path> - Path to the dataset which is split into 2 folders - train and test.
4
  """
5
  import sys
 
6
  from fastai.vision.all import unet_learner, Path, resnet34, rmse, MSELossFlat
7
- from src.code.custom_data_loading import create_data
8
  from dagshub.fastai import DAGsHubLogger
9
 
10
 
@@ -14,23 +15,30 @@ if __name__ == "__main__":
14
  print("usage: %s <data_path>" % sys.argv[0], file=sys.stderr)
15
  sys.exit(0)
16
 
 
 
 
17
  data = create_data(Path(sys.argv[1]))
18
- wd, lr, ep = 1e-2, 1e-3, 1
 
 
 
 
19
  learner = unet_learner(data,
20
- resnet34,
21
- metrics=rmse,
22
- wd=wd,
23
- n_out=3,
24
- loss_func=MSELossFlat(),
25
- path='src/',
26
- model_dir='models',
27
  cbs=DAGsHubLogger(
28
  metrics_path="logs/train_metrics.csv",
29
- hparams_path="logs/train_params.yml"
30
- ))
31
 
32
  print("Training model...")
33
- learner.fine_tune(epochs=ep, base_lr=lr)
 
34
  print("Saving model...")
35
  learner.save('model')
36
  print("Done!")
 
3
  <data_path> - Path to the dataset which is split into 2 folders - train and test.
4
  """
5
  import sys
6
+ import yaml
7
  from fastai.vision.all import unet_learner, Path, resnet34, rmse, MSELossFlat
8
+ from custom_data_loading import create_data
9
  from dagshub.fastai import DAGsHubLogger
10
 
11
 
 
15
  print("usage: %s <data_path>" % sys.argv[0], file=sys.stderr)
16
  sys.exit(0)
17
 
18
+ with open(r"./src/code/params.yml") as f:
19
+ params = yaml.safe_load(f)
20
+
21
  data = create_data(Path(sys.argv[1]))
22
+
23
+ metrics = {'rmse': rmse}
24
+ arch = {'resnet34': resnet34}
25
+ loss = {'MSELossFlat': MSELossFlat()}
26
+
27
  learner = unet_learner(data,
28
+ arch.get(params['architecture']),
29
+ metrics=metrics.get(params['train_metric']),
30
+ wd=float(params['weight_decay']),
31
+ n_out=int(params['num_outs']),
32
+ loss_func=loss.get(params['loss_func']),
33
+ path=params['source_dir'],
34
+ model_dir=params['model_dir'],
35
  cbs=DAGsHubLogger(
36
  metrics_path="logs/train_metrics.csv",
37
+ hparams_path="logs/train_params.yml"))
 
38
 
39
  print("Training model...")
40
+ learner.fine_tune(epochs=int(params['epochs']),
41
+ base_lr=float(params['learning_rate']))
42
  print("Saving model...")
43
  learner.save('model')
44
  print("Done!")