Dean commited on
Commit
ec2a2c2
1 Parent(s): d5a6d18

fix visualization stage.

Browse files

HF upload not tested in current version

Makefile CHANGED
@@ -48,15 +48,15 @@ pull:
48
 
49
  ## run the DVC pipeline - recompute any modified outputs such as processed data or trained models
50
  run:
51
- dvc repro dvc.yaml eval
52
 
53
  ## run the visualization using Streamlit
54
  visualize:
55
- dvc repro dvc.yaml visualize
56
 
57
  ## push the trained model to HF model hub
58
  push_to_hf_hub:
59
- dvc repro dvc.yaml push_to_hf_hub
60
 
61
  #################################################################################
62
  # PROJECT RULES #
48
 
49
  ## run the DVC pipeline - recompute any modified outputs such as processed data or trained models
50
  run:
51
+ dvc repro eval
52
 
53
  ## run the visualization using Streamlit
54
  visualize:
55
+ dvc repro visualize
56
 
57
  ## push the trained model to HF model hub
58
  push_to_hf_hub:
59
+ dvc repro push_to_hf_hub
60
 
61
  #################################################################################
62
  # PROJECT RULES #
data_params.yml CHANGED
@@ -1,2 +1,2 @@
1
  data: cnn_dailymail
2
- split: 0.01
1
  data: cnn_dailymail
2
+ split: 0.001
dvc.lock CHANGED
@@ -4,48 +4,45 @@ stages:
4
  cmd: python src/models/train_model.py
5
  deps:
6
  - path: data/processed/train.csv
7
- md5: 51edd724b75a8e99a78b9138f8f37c60
8
- size: 25012573
9
  - path: data/processed/validation.csv
10
- md5: 0900e2bb330df94cb045faddd0b945d1
11
- size: 1138285
12
- - path: params.yml
13
- md5: 200ce3c4d9f2e8b9eb040ef93eb22757
14
- size: 189
15
  - path: src/models/train_model.py
16
- md5: d57b5ff84bc29a8ea75e191027d70148
17
- size: 988
18
  outs:
19
  - path: models
20
- md5: ff6de43e1d1f4d7c3d0bb3b551c1085f.dir
21
- size: 486952666
22
- nfiles: 10
23
  - path: reports/training_metrics.csv
24
- md5: 62f71f6ba5390e07bc70e90ac3f1f0e8
25
- size: 727
26
- - path: reports/training_params.yml
27
- md5: 075736962fab2a5e5b3ff189c13e101b
28
- size: 16
29
  eval:
30
  cmd: python src/models/evaluate_model.py
31
  deps:
32
  - path: data/processed/test.csv
33
- md5: 3cb7b63891f12d53b3ef3e81a2e93f8e
34
- size: 986944
 
 
 
35
  - path: models
36
- md5: ff6de43e1d1f4d7c3d0bb3b551c1085f.dir
37
- size: 486952666
38
- nfiles: 10
39
- - path: params.yml
40
- md5: 200ce3c4d9f2e8b9eb040ef93eb22757
41
- size: 189
42
  - path: src/models/evaluate_model.py
43
- md5: 55d3aac9c8f024f7d2eb8ad5e0ae87ae
44
- size: 688
45
  outs:
46
- - path: reports/metrics.csv
47
- md5: e618e8c26e0def4e33abcad08ac35ac9
48
- size: 1690
49
  process_data:
50
  cmd: python src/data/process_data.py
51
  deps:
@@ -53,33 +50,47 @@ stages:
53
  md5: 2ab20ac1b58df875a590b07d0e04eb5b.dir
54
  size: 1358833013
55
  nfiles: 3
56
- - path: params.yml
57
- md5: 160cbfd0ed8f87c9c5cb28fbeef1072d
58
- size: 266
59
  - path: src/data/process_data.py
60
- md5: 5b6aaadc5a628979956d502b4fb4ebf2
61
- size: 516
62
  outs:
63
  - path: data/processed/test.csv
64
- md5: 5f2bfb37d55a13ead3c81564dbee2fd5
65
- size: 508508
66
  - path: data/processed/train.csv
67
- md5: 707c5ed455a15ec48965daf92fed7df6
68
- size: 12653913
69
  - path: data/processed/validation.csv
70
- md5: 1e021dc163cc87a32cef74a98e4a0d51
71
- size: 558403
72
  download_data:
73
  cmd: python src/data/make_dataset.py
74
  deps:
75
- - path: params.yml
76
- md5: 160cbfd0ed8f87c9c5cb28fbeef1072d
77
- size: 266
78
  - path: src/data/make_dataset.py
79
- md5: 075c6233f8732eedf7915732f9a8ebfd
80
- size: 771
81
  outs:
82
  - path: data/raw
83
  md5: 2ab20ac1b58df875a590b07d0e04eb5b.dir
84
  size: 1358833013
85
  nfiles: 3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  cmd: python src/models/train_model.py
5
  deps:
6
  - path: data/processed/train.csv
7
+ md5: 5331b9c32b2d097d8d7aca01de5524bc
8
+ size: 1198262
9
  - path: data/processed/validation.csv
10
+ md5: 6069153a075b00dfb6d9e0843dd2da89
11
+ size: 52739
12
+ - path: model_params.yml
13
+ md5: 9fcf006ee30f2b751078598a3fba9bb5
14
+ size: 235
15
  - path: src/models/train_model.py
16
+ md5: f7d1121426c3d5530c2b9697cb7ac74a
17
+ size: 951
18
  outs:
19
  - path: models
20
+ md5: fc37870a93db61b94af9f0847577f09b.dir
21
+ size: 243476333
22
+ nfiles: 5
23
  - path: reports/training_metrics.csv
24
+ md5: 0b6c1518aed802bea976e883caac2a90
25
+ size: 320
 
 
 
26
  eval:
27
  cmd: python src/models/evaluate_model.py
28
  deps:
29
  - path: data/processed/test.csv
30
+ md5: 3eec94ac211c76363a3d968663b82d02
31
+ size: 39574
32
+ - path: model_params.yml
33
+ md5: 9fcf006ee30f2b751078598a3fba9bb5
34
+ size: 235
35
  - path: models
36
+ md5: fc37870a93db61b94af9f0847577f09b.dir
37
+ size: 243476333
38
+ nfiles: 5
 
 
 
39
  - path: src/models/evaluate_model.py
40
+ md5: 89edb77aaab3055605ae6db2e21eab82
41
+ size: 705
42
  outs:
43
+ - path: reports/evaluation_metrics.csv
44
+ md5: a5fa12e6df10884217614c007d146a26
45
+ size: 2122
46
  process_data:
47
  cmd: python src/data/process_data.py
48
  deps:
50
  md5: 2ab20ac1b58df875a590b07d0e04eb5b.dir
51
  size: 1358833013
52
  nfiles: 3
53
+ - path: data_params.yml
54
+ md5: a68eabf79c3b3e28afb05baa1944bbc7
55
+ size: 32
56
  - path: src/data/process_data.py
57
+ md5: 68db554a69a0c8ce807907afa2be5e9c
58
+ size: 521
59
  outs:
60
  - path: data/processed/test.csv
61
+ md5: 3eec94ac211c76363a3d968663b82d02
62
+ size: 39574
63
  - path: data/processed/train.csv
64
+ md5: 5331b9c32b2d097d8d7aca01de5524bc
65
+ size: 1198262
66
  - path: data/processed/validation.csv
67
+ md5: 6069153a075b00dfb6d9e0843dd2da89
68
+ size: 52739
69
  download_data:
70
  cmd: python src/data/make_dataset.py
71
  deps:
72
+ - path: data_params.yml
73
+ md5: a68eabf79c3b3e28afb05baa1944bbc7
74
+ size: 32
75
  - path: src/data/make_dataset.py
76
+ md5: a0667f4ad8c06551609bd0bf950167b7
77
+ size: 776
78
  outs:
79
  - path: data/raw
80
  md5: 2ab20ac1b58df875a590b07d0e04eb5b.dir
81
  size: 1358833013
82
  nfiles: 3
83
+ visualize:
84
+ cmd: streamlit run src/visualization/visualize.py
85
+ deps:
86
+ - path: models
87
+ md5: fc37870a93db61b94af9f0847577f09b.dir
88
+ size: 243476333
89
+ nfiles: 5
90
+ - path: src/visualization/visualize.py
91
+ md5: a71303fef593a9fd275fc4964623baf8
92
+ size: 814
93
+ outs:
94
+ - path: reports/visualization_metrics.txt
95
+ md5: fd7b6bb170dbaa9ef1076bc8be7e7593
96
+ size: 2144
dvc.yaml CHANGED
@@ -50,9 +50,6 @@ stages:
50
  deps:
51
  - models
52
  - src/visualization/visualize.py
53
- metrics:
54
- - reports/visualization_metrics.txt:
55
- cache: false
56
  push_to_hf_hub:
57
  cmd: python src/models/hf_upload.py
58
  deps:
50
  deps:
51
  - models
52
  - src/visualization/visualize.py
 
 
 
53
  push_to_hf_hub:
54
  cmd: python src/models/hf_upload.py
55
  deps:
reports/evaluation_metrics.csv CHANGED
@@ -1,5 +1,37 @@
1
  Name,Value,Timestamp,Step
2
- "Rouge 1","{'Rouge_1 Low Precision': 0.34885388166790793, 'Rouge_1 Low recall': 0.28871556132198656, 'Rouge_1 Low F1': 0.31058637096822267, 'Rouge_1 Mid Precision': 0.412435004251884, 'Rouge_1 Mid recall': 0.3386352228897427, 'Rouge_1 Mid F1': 0.3517931748124066, 'Rouge_1 High Precision': 0.47625451117848977, 'Rouge_1 High recall': 0.39086727645312935, 'Rouge_1 High F1': 0.3959993953753958}",1627559683895,1
3
- "Rouge 2","{'Rouge_2 Low Precision': 0.1259156300716482, 'Rouge_2 Low recall': 0.10333119800163641, 'Rouge_2 Low F1': 0.10992592662502373, 'Rouge_2 Mid Precision': 0.16879303949162833, 'Rouge_2 Mid recall': 0.13805319188028575, 'Rouge_2 Mid F1': 0.14400796293585816, 'Rouge_2 High Precision': 0.21844214485938712, 'Rouge_2 High recall': 0.1777722350788, 'Rouge_2 High F1': 0.18342627795315522}",1627559683895,1
4
- "Rouge L","{'Rouge_L Low Precision': 0.2322041975032734, 'Rouge_L Low recall': 0.194000575085051, 'Rouge_L Low F1': 0.20468107864660212, 'Rouge_L Mid Precision': 0.2797360675037497, 'Rouge_L Mid recall': 0.22647774162854406, 'Rouge_L Mid F1': 0.2361293941929179, 'Rouge_L High Precision': 0.3357160682858357, 'Rouge_L High recall': 0.2622222798536235, 'Rouge_L High F1': 0.27267217209978356}",1627559683895,1
5
- "rougeLsum","{'rougeLsum Low Precision': 0.29651536760563263, 'rougeLsum Low recall': 0.2432094838451322, 'rougeLsum Low F1': 0.26048483356867896, 'rougeLsum Mid Precision': 0.35317671791338556, 'rougeLsum Mid recall': 0.286187817596869, 'rougeLsum Mid F1': 0.2985727815225495, 'rougeLsum High Precision': 0.4134539668577922, 'rougeLsum High recall': 0.3365998852405162, 'rougeLsum High F1': 0.3454898564714797}",1627559683895,1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  Name,Value,Timestamp,Step
2
+ "Rouge_1 Low Precision",0.23786550570641482,1628194352980,1
3
+ "Rouge_1 Low recall",0.23355396379384713,1628194352980,1
4
+ "Rouge_1 Low F1",0.23602599457077003,1628194352980,1
5
+ "Rouge_1 Mid Precision",0.3569471852499436,1628194352980,1
6
+ "Rouge_1 Mid recall",0.31915939075819916,1628194352980,1
7
+ "Rouge_1 Mid F1",0.3317618573023773,1628194352980,1
8
+ "Rouge_1 High Precision",0.4726861301480842,1628194352980,1
9
+ "Rouge_1 High recall",0.4019654200001146,1628194352980,1
10
+ "Rouge_1 High F1",0.4298956952594035,1628194352980,1
11
+ "Rouge_2 Low Precision",0.06184772400193972,1628194352980,1
12
+ "Rouge_2 Low recall",0.05626972412346313,1628194352980,1
13
+ "Rouge_2 Low F1",0.058680298802341754,1628194352980,1
14
+ "Rouge_2 Mid Precision",0.1367034298993256,1628194352980,1
15
+ "Rouge_2 Mid recall",0.11953160646342464,1628194352980,1
16
+ "Rouge_2 Mid F1",0.12485064123505887,1628194352980,1
17
+ "Rouge_2 High Precision",0.22739029631016827,1628194352980,1
18
+ "Rouge_2 High recall",0.18851628169809986,1628194352980,1
19
+ "Rouge_2 High F1",0.20306657551189072,1628194352980,1
20
+ "Rouge_L Low Precision",0.18248956154159507,1628194352980,1
21
+ "Rouge_L Low recall",0.18048774357814204,1628194352980,1
22
+ "Rouge_L Low F1",0.18151380309623336,1628194352980,1
23
+ "Rouge_L Mid Precision",0.2614974838710314,1628194352980,1
24
+ "Rouge_L Mid recall",0.24286688705755238,1628194352980,1
25
+ "Rouge_L Mid F1",0.24674586991996245,1628194352980,1
26
+ "Rouge_L High Precision",0.3574471638807763,1628194352980,1
27
+ "Rouge_L High recall",0.30836083808542225,1628194352980,1
28
+ "Rouge_L High F1",0.32385446385474176,1628194352980,1
29
+ "rougeLsum Low Precision",0.21468633089019287,1628194352980,1
30
+ "rougeLsum Low recall",0.2057771050364415,1628194352980,1
31
+ "rougeLsum Low F1",0.21170611912426093,1628194352980,1
32
+ "rougeLsum Mid Precision",0.3060593850789648,1628194352980,1
33
+ "rougeLsum Mid recall",0.27733553744690076,1628194352980,1
34
+ "rougeLsum Mid F1",0.28530501988436374,1628194352980,1
35
+ "rougeLsum High Precision",0.4094614601758424,1628194352980,1
36
+ "rougeLsum High recall",0.34640369291505535,1628194352980,1
37
+ "rougeLsum High F1",0.36454440079714096,1628194352980,1
reports/training_metrics.csv CHANGED
@@ -1,11 +1,9 @@
1
  Name,Value,Timestamp,Step
2
- "val_loss",5.029108047485352,1628177741756,14
3
- "epoch",0,1628177741756,14
4
- "val_loss",4.757647514343262,1628177893078,29
5
- "epoch",1,1628177893078,29
6
- "val_loss",4.493412494659424,1628177940684,44
7
- "epoch",2,1628177940684,44
8
- "train_loss",1.328701138496399,1628178045108,49
9
- "epoch",3,1628178045108,49
10
- "val_loss",4.228608131408691,1628178200552,59
11
- "epoch",3,1628178200552,59
1
  Name,Value,Timestamp,Step
2
+ "val_loss",2.615034580230713,1628194199660,0
3
+ "epoch",0,1628194199660,0
4
+ "val_loss",2.6141018867492676,1628194229556,1
5
+ "epoch",1,1628194229556,1
6
+ "val_loss",2.6132164001464844,1628194259447,2
7
+ "epoch",2,1628194259447,2
8
+ "val_loss",2.612450361251831,1628194289914,3
9
+ "epoch",3,1628194289914,3
 
 
src/__init__.py DELETED
@@ -1,12 +0,0 @@
1
- import os # noqa: F401
2
- import sys # noqa: F401
3
-
4
- from src.data.make_dataset import make_dataset # noqa: F401
5
- from src.data.process_data import process_data # noqa: F401
6
- from src.models.evaluate_model import evaluate_model # noqa: F401
7
- from src.models.model import Summarization # noqa: F401
8
- from src.models.predict_model import predict_model # noqa: F401
9
- from src.models.train_model import train_model # noqa: F401
10
- from src.visualization.visualize import visualize # noqa: F401
11
-
12
- sys.path.append(os.path.dirname(os.path.realpath(__file__))) # noqa: F401
 
 
 
 
 
 
 
 
 
 
 
 
src/data/__init__.py DELETED
File without changes
src/models/__init__.py DELETED
@@ -1,4 +0,0 @@
1
- from .model import Summarization # noqa: F401
2
- from .train_model import train_model # noqa: F401
3
- from .predict_model import predict_model # noqa: F401
4
- from .evaluate_model import evaluate_model # noqa: F401
 
 
 
 
src/models/predict_model.py CHANGED
@@ -1,6 +1,6 @@
1
  import yaml
2
 
3
- from model import Summarization
4
  import pandas as pd
5
 
6
 
1
  import yaml
2
 
3
+ from src.models.model import Summarization
4
  import pandas as pd
5
 
6
 
src/visualization/__init__.py DELETED
File without changes
src/visualization/visualize.py CHANGED
@@ -1,6 +1,6 @@
1
  import streamlit as st
2
 
3
- from ..models import predict_model
4
 
5
 
6
  def visualize():
@@ -18,9 +18,6 @@ def visualize():
18
  sumtext = predict_model(text=text)
19
  st.write("# Generated Summary:")
20
  st.write("{}".format(sumtext))
21
- with open("reports/visualization_metrics.txt", "w") as file1:
22
- file1.writelines(text)
23
- file1.writelines(sumtext)
24
 
25
 
26
  if __name__ == "__main__":
1
  import streamlit as st
2
 
3
+ from src.models.predict_model import predict_model
4
 
5
 
6
  def visualize():
18
  sumtext = predict_model(text=text)
19
  st.write("# Generated Summary:")
20
  st.write("{}".format(sumtext))
 
 
 
21
 
22
 
23
  if __name__ == "__main__":