Sadashiv commited on
Commit
625ed08
1 Parent(s): e7df225

Upload 31 files

Browse files
Files changed (33) hide show
  1. .gitattributes +1 -0
  2. crop-recommendation +0 -1
  3. crop-recommendation/.gitignore +168 -0
  4. crop-recommendation/.vscode/extensions.json +10 -0
  5. crop-recommendation/.vscode/settings.json +8 -0
  6. crop-recommendation/.vscode/tasks.json +15 -0
  7. crop-recommendation/LICENSE +21 -0
  8. crop-recommendation/README.md +50 -0
  9. crop-recommendation/data_download.py +40 -0
  10. crop-recommendation/main.py +8 -0
  11. crop-recommendation/notebooks/crop-recommendation-notebook.ipynb +0 -0
  12. crop-recommendation/requirements.txt +11 -0
  13. crop-recommendation/saved_models/0/model/model.pkl +3 -0
  14. crop-recommendation/saved_models/0/target_encoder/target_encoder.pkl +0 -0
  15. crop-recommendation/saved_models/0/transformer/transformer.pkl +0 -0
  16. crop-recommendation/src/__init__.py +0 -0
  17. crop-recommendation/src/components/__init__.py +0 -0
  18. crop-recommendation/src/components/data_ingestion.py +73 -0
  19. crop-recommendation/src/components/data_trasformation.py +113 -0
  20. crop-recommendation/src/components/data_validation.py +159 -0
  21. crop-recommendation/src/components/model_evaluation.py +123 -0
  22. crop-recommendation/src/components/model_pusher.py +69 -0
  23. crop-recommendation/src/components/model_trainer.py +107 -0
  24. crop-recommendation/src/config.py +20 -0
  25. crop-recommendation/src/entity/__init__.py +0 -0
  26. crop-recommendation/src/entity/artifact_entity.py +40 -0
  27. crop-recommendation/src/entity/config_entity.py +120 -0
  28. crop-recommendation/src/exception.py +21 -0
  29. crop-recommendation/src/logger.py +22 -0
  30. crop-recommendation/src/pipeline/__init__.py +0 -0
  31. crop-recommendation/src/pipeline/training_pipeline.py +95 -0
  32. crop-recommendation/src/predictor.py +100 -0
  33. crop-recommendation/src/utils.py +106 -0
.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ crop-recommendation/saved_models/0/model/model.pkl filter=lfs diff=lfs merge=lfs -text
crop-recommendation DELETED
@@ -1 +0,0 @@
1
- Subproject commit ce875580acbce4044f62e37db955614203e1232b
 
 
crop-recommendation/.gitignore ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea/
161
+
162
+ data_dump.py
163
+ demo.ipynb
164
+ kaggle.json
165
+ crop-recommendation-dataset
166
+ catboost_info
167
+ temp.py
168
+ artifact
crop-recommendation/.vscode/extensions.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "recommendations": [
3
+ "mongodb.mongodb-vscode",
4
+ "ms-python.python",
5
+ "ms-toolsai.jupyter",
6
+ "ms-toolsai.jupyter-keymap",
7
+ "ms-toolsai.jupyter-renderers",
8
+ "formulahendry.code-runner"
9
+ ]
10
+ }
crop-recommendation/.vscode/settings.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "workbench.colorTheme": "Default Dark+",
3
+ "workbench.preferredDarkColorTheme": "Default Dark+",
4
+ "task.allowAutomaticTasks": "on",
5
+ "workbench.editorAssociations": {
6
+ "*.md": "vscode.markdown.preview.editor"
7
+ }
8
+ }
crop-recommendation/.vscode/tasks.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "2.0.0",
3
+ "tasks": [
4
+ {
5
+ "label": "Installing extensions and dependencies...",
6
+ "type": "shell",
7
+ "command": "code-server --install-extension mongodb.mongodb-vscode --install-extension ms-python.python --install-extension formulahendry.code-runner && pip install -r requirements.txt",
8
+ "presentation": {
9
+ "reveal": "always",
10
+ "panel": "new"
11
+ },
12
+ "runOptions": { "runOn": "folderOpen" }
13
+ }
14
+ ]
15
+ }
crop-recommendation/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Sadashiv Nandanikar
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
crop-recommendation/README.md ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Crop Recommendation
2
+
3
+ #### Harnessing the capabilities of machine learning models, analyzes specific parameters to suggest the most suitable crops, optimizing yields and efficiency.
4
+
5
+ ## Demo
6
+ ### Input Interface
7
+ <img src="https://github.com/07Sada/crop-recommendation/assets/112761379/3f8c5f4d-1df4-4516-b428-f4b95a2cc5df" alt="Image 1" width="800">
8
+
9
+ ### Output Interface
10
+ <img src="https://github.com/07Sada/crop-recommendation/assets/112761379/86a4aefd-b973-40ad-b79c-f2b1dd070d91" alt="Image 1" width="800">
11
+
12
+ ## Data Source
13
+ This dataset contains information about the soil and environmental conditions that are ideal for growing different crops. The dataset includes the following columns:
14
+
15
+ - `N`: The ratio of nitrogen content in the soil.
16
+ - `P`: The ratio of phosphorus content in the soil.
17
+ - `K`: The ratio of potassium content in the soil.
18
+ - `Temperature`: The temperature in degrees Celsius.
19
+ - `Humidity`: The relative humidity in percent.
20
+ - `pH`: The pH value of the soil.
21
+ - `Rainfall`: The rainfall in millimeters.
22
+
23
+ [Link](https://www.kaggle.com/datasets/atharvaingle/crop-recommendation-dataset) for the dataset
24
+
25
+ <details>
26
+ <summary>Supported crops
27
+ </summary>
28
+
29
+ - Apple
30
+ - Blueberry
31
+ - Cherry
32
+ - Corn
33
+ - Grape
34
+ - Pepper
35
+ - Orange
36
+ - Peach
37
+ - Potato
38
+ - Soybean
39
+ - Strawberry
40
+ - Tomato
41
+ - Squash
42
+ - Raspberry
43
+ </details>
44
+
45
+ ## Project Details
46
+ This is repository is submodule for [CropGaurd](https://github.com/07Sada/CropGaurd.git)
47
+
48
+ ## Project PipeLine Stages
49
+ ![Project PipeLine Stages](https://user-images.githubusercontent.com/112761379/225940480-2a7381b2-6abd-4c1c-8287-0fd49099be8c.jpg)
50
+
crop-recommendation/data_download.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import opendatasets as od
2
+ import os
3
+ import json
4
+ from dotenv import load_dotenv
5
+
6
+ # Load variables from .env file
7
+ load_dotenv()
8
+
9
+ DATASET_URL = "https://www.kaggle.com/datasets/atharvaingle/crop-recommendation-dataset"
10
+
11
+ def create_kaggle_json_file():
12
+ # Fetch the username and API key from the .env file
13
+ username = os.getenv('username')
14
+ key = os.getenv('key')
15
+
16
+ kaggle_credentials = {
17
+ "username": username,
18
+ "key": key
19
+ }
20
+
21
+ # Path to the kaggle.json file
22
+ kaggle_file_path = os.path.join(os.getcwd(), 'kaggle.json')
23
+
24
+ # Write the dictionary to the .kaggle/kaggle.json file
25
+ with open(kaggle_file_path, 'w') as file:
26
+ json.dump(kaggle_credentials, file)
27
+
28
+ def remove_kaggle_json_file():
29
+ # Path to the kaggle.json file
30
+ kaggle_file_path = os.path.join(os.getcwd(), 'kaggle.json')
31
+
32
+ # Remove the kaggle.json file
33
+ os.remove(kaggle_file_path)
34
+
35
+ create_kaggle_json_file()
36
+
37
+ od.download(DATASET_URL)
38
+
39
+ # Remove the kaggle.json file after downloading the dataset
40
+ remove_kaggle_json_file()
crop-recommendation/main.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from src.pipeline.training_pipeline import start_training_pipeline
2
+
3
+ if __name__ =="__main__":
4
+ try:
5
+ start_training_pipeline()
6
+
7
+ except Exception as e:
8
+ print(e)
crop-recommendation/notebooks/crop-recommendation-notebook.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
crop-recommendation/requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pymongo
2
+ pandas
3
+ numpy
4
+ matplotlib
5
+ seaborn
6
+ scikit-learn
7
+ opendatasets
8
+ python-dotenv
9
+ ipykernel
10
+ PyYAML
11
+ dill
crop-recommendation/saved_models/0/model/model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61347ed5e6bbb2060eddc5a515c43e9d61aae5f6f1c7eaecb1f52b64f2df89a5
3
+ size 3676666
crop-recommendation/saved_models/0/target_encoder/target_encoder.pkl ADDED
Binary file (499 Bytes). View file
 
crop-recommendation/saved_models/0/transformer/transformer.pkl ADDED
Binary file (901 Bytes). View file
 
crop-recommendation/src/__init__.py ADDED
File without changes
crop-recommendation/src/components/__init__.py ADDED
File without changes
crop-recommendation/src/components/data_ingestion.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.entity import config_entity
2
+ from src.entity import artifact_entity
3
+ from src.exception import CropException
4
+ from src.logger import logging
5
+ from src import utils
6
+
7
+ from sklearn.model_selection import train_test_split
8
+ import numpy as np
9
+ import pandas as pd
10
+ import sys
11
+ import os
12
+
13
+
14
+ class DataIngestion:
15
+ def __init__(self, data_ingestion_config: config_entity.DataIngestionConfig):
16
+ try:
17
+ logging.info(f"{'>>'*20} Data Ingestion {'<<'*20}")
18
+ self.data_ingestion_config = data_ingestion_config
19
+ except Exception as e:
20
+ raise CropException(e, sys)
21
+
22
+ def initiate_data_ingestion(self) -> artifact_entity.DataIngestionArtifact:
23
+ try:
24
+ logging.info("Exporting collection data as pandas dataframe")
25
+
26
+ df: pd.DataFrame = utils.get_collection_as_dataframe(
27
+ database_name=self.data_ingestion_config.database_name,
28
+ collection_name=self.data_ingestion_config.collection_name,
29
+ )
30
+
31
+ logging.info("Saving data in feature store")
32
+
33
+ feature_store_dir = os.path.dirname(self.data_ingestion_config.feature_store_file_path)
34
+ os.makedirs(feature_store_dir, exist_ok=True)
35
+
36
+ logging.info("Saving dataframe into feature store")
37
+ df.to_csv(
38
+ path_or_buf=self.data_ingestion_config.feature_store_file_path,
39
+ index=False,
40
+ header=True,
41
+ )
42
+
43
+ logging.info("split dataset into train and test test")
44
+ train_df, test_df = train_test_split(
45
+ df, test_size=self.data_ingestion_config.test_size, random_state=42
46
+ )
47
+
48
+ logging.info("create dataset directory folder if not available")
49
+ dataset_dir = os.path.dirname(self.data_ingestion_config.train_file_path)
50
+ os.makedirs(dataset_dir, exist_ok=True)
51
+
52
+ logging.info("Save df to feature store folder")
53
+ train_df.to_csv(
54
+ path_or_buf=self.data_ingestion_config.train_file_path,
55
+ index=False,
56
+ header=True,
57
+ )
58
+ test_df.to_csv(
59
+ path_or_buf=self.data_ingestion_config.test_file_path,
60
+ index=False,
61
+ header=True,
62
+ )
63
+
64
+ data_ingestion_artifact = artifact_entity.DataIngestionArtifact(
65
+ feature_store_file_path=self.data_ingestion_config.feature_store_file_path,
66
+ train_file_path=self.data_ingestion_config.train_file_path,
67
+ test_file_path=self.data_ingestion_config.test_file_path,
68
+ )
69
+ logging.info(f"Data ingestion artifact: {data_ingestion_artifact}")
70
+ return data_ingestion_artifact
71
+
72
+ except Exception as e:
73
+ raise CropException(error_message=e, error_detail=sys)
crop-recommendation/src/components/data_trasformation.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.entity import artifact_entity
2
+ from src.entity import config_entity
3
+ from src.logger import logging
4
+ from src.exception import CropException
5
+ from src import utils
6
+ from src.config import TARGET_COLUMN
7
+
8
+ from typing import Optional
9
+ import os
10
+ import sys
11
+
12
+ from sklearn.pipeline import Pipeline
13
+ from sklearn.preprocessing import LabelEncoder
14
+ from sklearn.preprocessing import StandardScaler
15
+ import pandas as pd
16
+ import numpy as np
17
+
18
+
19
+ class DataTransformation:
20
+ def __init__(
21
+ self,
22
+ data_transformation_config: config_entity.DataTransformationConfig,
23
+ data_ingestion_artifact: artifact_entity.DataIngestionArtifact,
24
+ ):
25
+ try:
26
+ logging.info(f"{'>'*20} Data Transformation Initiated {'<'*20}")
27
+ self.data_transformation_config = data_transformation_config
28
+ self.data_ingestion_artifact = data_ingestion_artifact
29
+
30
+ except Exception as e:
31
+ raise CropException(e, sys)
32
+
33
+ @classmethod
34
+ def get_data_tranformer_object(cls) -> Pipeline:
35
+ try:
36
+ standard_scaler = StandardScaler()
37
+
38
+ pipeline = Pipeline(steps=[("StandardScaler", standard_scaler)])
39
+
40
+ return pipeline
41
+
42
+ except Exception as e:
43
+ raise CropException(e, sys)
44
+
45
+ def initiate_data_transformation(
46
+ self,
47
+ ) -> artifact_entity.DataTransformationArtifact:
48
+ try:
49
+ # reading training and testing file
50
+ train_df = pd.read_csv(self.data_ingestion_artifact.train_file_path)
51
+ test_df = pd.read_csv(self.data_ingestion_artifact.test_file_path)
52
+
53
+ # selecting input features for train and test dataframe
54
+ input_feature_train_df = train_df.drop(TARGET_COLUMN, axis=1)
55
+ input_feature_test_df = test_df.drop(TARGET_COLUMN, axis=1)
56
+
57
+ # selecting target feature for train and test dataframe
58
+ target_feature_train_df = train_df[TARGET_COLUMN]
59
+ target_feature_test_df = test_df[TARGET_COLUMN]
60
+
61
+ label_encoder = LabelEncoder()
62
+ label_encoder.fit(target_feature_train_df)
63
+
64
+ # transformation on target column
65
+ target_feature_train_arr = label_encoder.transform(target_feature_train_df)
66
+ target_feature_test_arr = label_encoder.transform(target_feature_test_df)
67
+
68
+ # transforming input features
69
+ transformation_pipeline = DataTransformation.get_data_tranformer_object()
70
+ transformation_pipeline.fit(input_feature_train_df)
71
+
72
+ input_feature_train_arr = transformation_pipeline.transform(
73
+ input_feature_train_df
74
+ )
75
+ input_feature_test_arr = transformation_pipeline.transform(
76
+ input_feature_test_df
77
+ )
78
+
79
+ train_arr = np.c_[input_feature_train_arr, target_feature_train_arr]
80
+ test_arr = np.c_[input_feature_test_arr, target_feature_test_arr]
81
+
82
+ # save the numpy array
83
+ utils.save_object(
84
+ file_path=self.data_transformation_config.transformed_train_path,
85
+ obj=train_arr,
86
+ )
87
+ utils.save_object(
88
+ file_path=self.data_transformation_config.transformed_test_path,
89
+ obj=test_arr,
90
+ )
91
+
92
+ utils.save_object(
93
+ file_path=self.data_transformation_config.transform_object_path,
94
+ obj=transformation_pipeline,
95
+ )
96
+
97
+ utils.save_object(
98
+ file_path=self.data_transformation_config.target_encoder_path,
99
+ obj=label_encoder,
100
+ )
101
+
102
+ data_transformation_artifact = artifact_entity.DataTransformationArtifact(
103
+ transform_object_path=self.data_transformation_config.transform_object_path,
104
+ transformed_train_path=self.data_transformation_config.transformed_train_path,
105
+ transformed_test_path=self.data_transformation_config.transformed_test_path,
106
+ target_encoder_path=self.data_transformation_config.target_encoder_path,
107
+ )
108
+
109
+ logging.info(f"Data transformation object : {data_transformation_artifact}")
110
+ return data_transformation_artifact
111
+
112
+ except Exception as e:
113
+ raise CropException(e, sys)
crop-recommendation/src/components/data_validation.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.entity import artifact_entity
2
+ from src.entity import config_entity
3
+ from src.logger import logging
4
+ from src.exception import CropException
5
+ from src.config import TARGET_COLUMN
6
+ from src import utils
7
+
8
+ from typing import Optional
9
+ from scipy.stats import ks_2samp
10
+ import pandas as pd
11
+ import numpy as np
12
+ import sys
13
+ import os
14
+
15
+
16
+ class DataValidation:
17
+ def __init__(
18
+ self,
19
+ data_validation_config: config_entity.DataValidationConfig,
20
+ data_ingestion_artifact: artifact_entity.DataIngestionArtifact,
21
+ ):
22
+ try:
23
+ logging.info(f"{'>'*20} Data Validation iniated {'<'*20}")
24
+ self.data_validation_config = data_validation_config
25
+ self.data_ingestion_artifact = data_ingestion_artifact
26
+ self.validation_error = dict()
27
+ except Exception as e:
28
+ raise CropException(e, sys)
29
+
30
+ def is_required_columns_exists(
31
+ self, base_df: pd.DataFrame, current_df: pd.DataFrame, report_key_name: str
32
+ ) -> bool:
33
+ try:
34
+ base_columns = base_df.columns
35
+ current_columns = current_df.columns
36
+
37
+ missing_columns = []
38
+ for base_column in base_columns:
39
+ if base_column not in current_columns:
40
+ logging.info(f"Column: {base_column} is not available")
41
+ missing_columns.append(base_column)
42
+
43
+ if len(missing_columns) > 0:
44
+ self.validation_error[report_key_name] = missing_columns
45
+ return False
46
+
47
+ return True
48
+
49
+ except Exception as e:
50
+ raise CropException(e, sys)
51
+
52
+ def data_drift(
53
+ self, base_df: pd.DataFrame, current_df: pd.DataFrame, report_key_name: str
54
+ ):
55
+ try:
56
+ drift_report = dict()
57
+
58
+ base_columns = base_df.columns
59
+ current_columns = current_df.columns
60
+
61
+ for base_column in base_columns:
62
+ base_data, current_data = base_df[base_column], current_df[base_column]
63
+
64
+ # Null hypothesis is that both columns data drawn from same distribution
65
+
66
+ logging.info(
67
+ f"Hypothesis {base_column} : {base_data.dtype}, {current_data.dtype}"
68
+ )
69
+ same_distribution = ks_2samp(base_data, current_data)
70
+
71
+ if same_distribution.pvalue > 0.05:
72
+ # we are accepting the null hypothesis
73
+ drift_report[base_column] = {
74
+ "pvalue": float(same_distribution.pvalue),
75
+ "same_distribution": True,
76
+ }
77
+
78
+ else:
79
+ drift_report[base_column] = {
80
+ "pvalue": float(same_distribution.pvalue),
81
+ "same_distribution": False,
82
+ }
83
+
84
+ self.validation_error[report_key_name] = drift_report
85
+
86
+ except Exception as e:
87
+ raise CropException(e, sys)
88
+
89
+ def initiate_data_validation(self) -> artifact_entity.DataValidationArtifact:
90
+ try:
91
+ logging.info(f"Reading base dataframe")
92
+ base_df = pd.read_csv(self.data_validation_config.base_file_path)
93
+
94
+ logging.info(f"Reading train dataframe")
95
+ train_df = pd.read_csv(self.data_ingestion_artifact.train_file_path)
96
+
97
+ logging.info(f"Reading test dataframe")
98
+ test_df = pd.read_csv(self.data_ingestion_artifact.test_file_path)
99
+
100
+ exclude_column = [TARGET_COLUMN]
101
+ base_df = utils.seperate_dependant_column(
102
+ df=base_df, exclude_column=exclude_column
103
+ )
104
+ train_df = utils.seperate_dependant_column(
105
+ df=train_df, exclude_column=exclude_column
106
+ )
107
+ test_df = utils.seperate_dependant_column(
108
+ df=test_df, exclude_column=exclude_column
109
+ )
110
+
111
+ logging.info(f"Is all required columns present in the train_df")
112
+ train_df_columns_status = self.is_required_columns_exists(
113
+ base_df=base_df,
114
+ current_df=train_df,
115
+ report_key_name="missing_columns_within_train_dataset",
116
+ )
117
+
118
+ test_df_columns_status = self.is_required_columns_exists(
119
+ base_df=base_df,
120
+ current_df=test_df,
121
+ report_key_name="missing_columns_within_test_dataset",
122
+ )
123
+
124
+ if train_df_columns_status:
125
+ logging.info(
126
+ f"As all column are available in train df hence detecting data drift"
127
+ )
128
+ self.data_drift(
129
+ base_df=base_df,
130
+ current_df=train_df,
131
+ report_key_name="data_drift_within_train_dataset",
132
+ )
133
+
134
+ if test_df_columns_status:
135
+ logging.info(
136
+ f"As all column are available in test df hence detecting data drift"
137
+ )
138
+ self.data_drift(
139
+ base_df=base_df,
140
+ current_df=test_df,
141
+ report_key_name="data_drift_within_test_dataset",
142
+ )
143
+
144
+ # writing the report
145
+ logging.info("Writing report in yaml format")
146
+ utils.write_yaml_file(
147
+ file_path=self.data_validation_config.report_file_path,
148
+ data=self.validation_error,
149
+ )
150
+
151
+ data_validation_artifact = artifact_entity.DataValidationArtifact(
152
+ report_file_path=self.data_validation_config.report_file_path
153
+ )
154
+ logging.info(f"Data validation artifact: {data_validation_artifact}")
155
+
156
+ return data_validation_artifact
157
+
158
+ except Exception as e:
159
+ raise CropException(e, sys)
crop-recommendation/src/components/model_evaluation.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.predictor import ModelResolver
2
+ from src.entity import config_entity
3
+ from src.entity import artifact_entity
4
+ from src.logger import logging
5
+ from src.exception import CropException
6
+ from src.config import TARGET_COLUMN
7
+ from src.utils import load_object
8
+
9
+ from sklearn.metrics import f1_score
10
+ import pandas as pd
11
+ import numpy as np
12
+ import os
13
+ import sys
14
+
15
+
16
+ class ModelEvaluation:
17
+ def __init__(
18
+ self,
19
+ model_eval_config: config_entity.ModelEvaluationConfig,
20
+ data_ingesiton_artifact: artifact_entity.DataIngestionArtifact,
21
+ data_transformation_artifact: artifact_entity.DataTransformationArtifact,
22
+ model_trainer_artifact: artifact_entity.ModelTrainerArtifact,
23
+ ):
24
+ try:
25
+ logging.info(f"{'>'*20} Model Evaluation Initiated {'<'*20}")
26
+ self.model_eval_config = model_eval_config
27
+ self.data_ingesiton_artifact = data_ingesiton_artifact
28
+ self.data_transformation_artifact = data_transformation_artifact
29
+ self.model_trainer_artifact = model_trainer_artifact
30
+ self.model_resolver = ModelResolver()
31
+
32
+ except Exception as e:
33
+ raise CropException(e, sys)
34
+
35
+ def initiate_model_evaluation(self) -> artifact_entity.ModelEvaluationArtifact:
36
+ try:
37
+ logging.info(
38
+ f"If the saved model directory contains a model, we will compare which model is best trained: \
39
+ the model from the saved model folder or the new model."
40
+ )
41
+
42
+ latest_dir_path = self.model_resolver.get_latest_dir_path()
43
+ if latest_dir_path == None:
44
+ model_eval_artifact = artifact_entity.ModelEvaluationArtifact(
45
+ is_model_accepted=True, improved_accuracy=None
46
+ )
47
+ logging.info(f"Model evaluation artifact: {model_eval_artifact}")
48
+ return model_eval_artifact
49
+
50
+ # finding location of transformed model, and target encoder
51
+ logging.info(f"Finding location of transformer model and target encoder")
52
+ transformer_path = self.model_resolver.get_latest_transformer_path()
53
+
54
+ model_path = self.model_resolver.get_latest_model_path()
55
+
56
+ target_encoder_path = self.model_resolver.get_latest_target_encoder_path()
57
+
58
+ logging.info(
59
+ f"Previous trained objects of transformer, model and target encoder"
60
+ )
61
+ # previous trained objects
62
+ transformer = load_object(file_path=transformer_path)
63
+ model = load_object(file_path=model_path)
64
+ target_encoder = load_object(file_path=target_encoder_path)
65
+
66
+ logging.info(f"Currently trained model objects")
67
+ # currently trained model objects
68
+ current_transformer = load_object(
69
+ file_path=self.data_transformation_artifact.transform_object_path
70
+ )
71
+ current_model = load_object(
72
+ file_path=self.model_trainer_artifact.model_path
73
+ )
74
+ current_target_encoder = load_object(
75
+ file_path=self.data_transformation_artifact.target_encoder_path
76
+ )
77
+
78
+ test_df = pd.read_csv(self.data_ingesiton_artifact.test_file_path)
79
+ target_df = test_df[TARGET_COLUMN]
80
+
81
+ y_true = target_encoder.transform(target_df)
82
+
83
+ # accuracy using previous trained model
84
+
85
+ input_feature_name = list(transformer.feature_names_in_)
86
+ input_arr = transformer.transform(test_df[input_feature_name])
87
+
88
+ y_pred = current_model.predict(input_arr)
89
+ y_true = current_target_encoder.transform(target_df)
90
+
91
+
92
+ previous_model_score = f1_score(
93
+ y_true=y_true, y_pred=y_pred, average="weighted"
94
+ )
95
+
96
+ # accuracy using current model
97
+ input_feature_name = list(current_transformer.feature_names_in_)
98
+ input_arr = current_transformer.transform(test_df[input_feature_name])
99
+
100
+ y_pred = current_model.predict(input_arr)
101
+ y_true = current_target_encoder.transform(target_df)
102
+
103
+
104
+ current_model_score = f1_score(
105
+ y_true=y_true, y_pred=y_pred, average="weighted"
106
+ )
107
+
108
+ logging.info(f"Accuracy using current trained model: {current_model_score}")
109
+
110
+ if current_model_score <= previous_model_score:
111
+ logging.info(f"Current trained model is not better than previous model")
112
+ raise Exception("Current trained model is not better than previous model")
113
+
114
+ model_eval_artifact = artifact_entity.ModelEvaluationArtifact(
115
+ is_model_accepted=True,
116
+ improved_accuracy=current_model_score - previous_model_score,
117
+ )
118
+ logging.info(f"Model Eval artifacts: {model_eval_artifact}")
119
+
120
+ return model_eval_artifact
121
+
122
+ except Exception as e:
123
+ raise CropException(e, sys)
crop-recommendation/src/components/model_pusher.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.entity.config_entity import ModelPusherConfig
2
+ from src.entity import artifact_entity
3
+ from src.predictor import ModelResolver
4
+ from src.exception import CropException
5
+ from src.logger import logging
6
+ from src.utils import load_object, save_object
7
+ from src.entity.artifact_entity import (
8
+ DataTransformationArtifact,
9
+ ModelTrainerArtifact,
10
+ ModelPusherArtifact,
11
+ )
12
+ import sys
13
+ import os
14
+
15
+
16
+ class ModelPusher:
17
+ def __init__(
18
+ self,
19
+ model_pusher_config: ModelPusherConfig,
20
+ data_transformation_artifact: DataTransformationArtifact,
21
+ model_trainer_artifact: ModelTrainerArtifact,
22
+ ):
23
+ try:
24
+ logging.info(f"{'>'*20} Model Pusher Initiated {'<'*30}")
25
+ self.model_pusher_config = model_pusher_config
26
+ self.data_transformation_artifact = data_transformation_artifact
27
+ self.model_trainer_artifact = model_trainer_artifact
28
+ self.model_resolver = ModelResolver(
29
+ model_registry=self.model_pusher_config.saved_model_dir
30
+ )
31
+ except Exception as e:
32
+ raise CropException(e, sys)
33
+
34
+ def initiate_model_pusher(self) -> ModelPusherArtifact:
35
+ try:
36
+ # load object
37
+ logging.info(f"Loading transformer model and target encoder")
38
+ transformer = load_object(file_path=self.data_transformation_artifact.transform_object_path)
39
+ model = load_object(file_path=self.model_trainer_artifact.model_path)
40
+ target_encoder = load_object(file_path=self.data_transformation_artifact.target_encoder_path)
41
+
42
+ # model pusher dir
43
+ logging.info(f"Saving model into model pusher directory")
44
+ save_object(file_path=self.model_pusher_config.pusher_transformer_path,obj=transformer)
45
+ save_object(file_path=self.model_pusher_config.pusher_model_path, obj=model)
46
+ save_object(file_path=self.model_pusher_config.pusher_target_encoder_path, obj=target_encoder)
47
+
48
+
49
+ # saved model dir
50
+ logging.info(f"Saving model in saved model dir")
51
+
52
+ transformer_path = self.model_resolver.get_latest_save_transformer_path()
53
+ model_path = self.model_resolver.get_latest_save_model_path()
54
+ target_encoder_path = self.model_resolver.get_latest_save_target_encoder_path()
55
+
56
+ save_object(file_path=transformer_path, obj=transformer)
57
+ save_object(file_path=model_path, obj=model)
58
+ save_object(file_path=target_encoder_path, obj=target_encoder)
59
+
60
+ model_pusher_artifact = ModelPusherArtifact(
61
+ pusher_model_dir=self.model_pusher_config.pusher_model_dir,
62
+ saved_model_dir=self.model_pusher_config.saved_model_dir,
63
+ )
64
+ logging.info(f"Model Pusher artifact: {model_pusher_artifact}")
65
+
66
+ return model_pusher_artifact
67
+
68
+ except Exception as e:
69
+ raise CropException(e, sys)
crop-recommendation/src/components/model_trainer.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.entity import config_entity
2
+ from src.entity import artifact_entity
3
+ from src.logger import logging
4
+ from src.exception import CropException
5
+ from src import utils
6
+
7
+ from typing import Optional
8
+ from sklearn.metrics import f1_score
9
+ from sklearn.ensemble import RandomForestClassifier
10
+ import os
11
+ import sys
12
+
13
+
14
+ class ModelTrainer:
15
+ def __init__(
16
+ self,
17
+ model_trainer_config: config_entity.ModelTrainerConfig,
18
+ data_transformation_artifact: artifact_entity.DataTransformationArtifact,
19
+ ):
20
+ try:
21
+ logging.info(f"{'>'*30} Model Trainer Initiated {'<'*30}")
22
+ self.model_trainer_config = model_trainer_config
23
+ self.data_transformation_artifact = data_transformation_artifact
24
+
25
+ except Exception as e:
26
+ raise CropException(e, sys)
27
+
28
+ def train_model(self, X, y):
29
+ try:
30
+ random_forest = RandomForestClassifier()
31
+ random_forest.fit(X, y)
32
+
33
+ return random_forest
34
+
35
+ except Exception as e:
36
+ raise CropException(e, sys)
37
+
38
+ def initiate_model_trainer(self) -> artifact_entity.ModelTrainerArtifact:
39
+ try:
40
+ logging.info(f"Loading train and test array")
41
+ train_arr = utils.load_numpy_array_data(
42
+ file_path=self.data_transformation_artifact.transformed_train_path
43
+ )
44
+ test_arr = utils.load_numpy_array_data(
45
+ file_path=self.data_transformation_artifact.transformed_test_path
46
+ )
47
+
48
+ logging.info(
49
+ f"Splitting input and target feature from both train and test arr. "
50
+ )
51
+ X_train, y_train = train_arr[:, :-1], train_arr[:, -1]
52
+ X_test, y_test = test_arr[:, :-1], test_arr[:, -1]
53
+
54
+ logging.info(f"Training the model")
55
+ model = self.train_model(X=X_train, y=y_train)
56
+
57
+ logging.info(f"Calculating f1 train scrore")
58
+ yhat_train = model.predict(X_train)
59
+ f1_train_score = f1_score(
60
+ y_true=y_train, y_pred=yhat_train, average="weighted"
61
+ )
62
+
63
+ logging.info(f"Calculating f1 test score")
64
+ yhat_test = model.predict(X_test)
65
+ f1_test_score = f1_score(
66
+ y_true=y_test, y_pred=yhat_test, average="weighted"
67
+ )
68
+
69
+ logging.info(
70
+ f"train_score: {f1_train_score} and test score: {f1_test_score}"
71
+ )
72
+
73
+ # checking for overfitting or underfitting or expected score
74
+ logging.info(f"Checking if out model is underfitting or not")
75
+ if f1_test_score < self.model_trainer_config.expected_score:
76
+ raise Exception(
77
+ f"Model is not good as it is not able to give \
78
+ expected accuracy: {self.model_trainer_config.expected_score}, model actual score: {f1_test_score}"
79
+ )
80
+
81
+ logging.info(f"Checking if our model is overfitting or not")
82
+ diff = abs(f1_train_score - f1_test_score)
83
+
84
+ if diff > self.model_trainer_config.overfitting_threshold:
85
+ raise Exception(
86
+ f"Train and test score diff: {diff} \
87
+ is more than overfitting threshold: {self.model_trainer_config.overfitting_threshold}"
88
+ )
89
+
90
+ # save the trained model
91
+ logging.info(f"Saving model object")
92
+ utils.save_object(file_path=self.model_trainer_config.model_path, obj=model)
93
+
94
+ # prepare artifact
95
+ logging.info(f"Prepare the artifact")
96
+ model_trainer_artifact = artifact_entity.ModelTrainerArtifact(
97
+ model_path=self.model_trainer_config.model_path,
98
+ f1_train_score=f1_train_score,
99
+ f2_test_score=f1_test_score,
100
+ )
101
+
102
+ logging.info(f"Model trainer artifact: {model_trainer_artifact}")
103
+
104
+ return model_trainer_artifact
105
+
106
+ except Exception as e:
107
+ raise CropException(e, sys)
crop-recommendation/src/config.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pymongo
2
+ import pandas as pd
3
+ import json
4
+ from dataclasses import dataclass
5
+ import os
6
+ from dotenv import load_dotenv
7
+
8
+ load_dotenv()
9
+
10
+
11
+ @dataclass
12
+ class EnvironmentVariable:
13
+ mongo_db_url = os.getenv("MONGO_URL")
14
+
15
+
16
+ env = EnvironmentVariable()
17
+
18
+ mongo_client = pymongo.MongoClient(env.mongo_db_url)
19
+
20
+ TARGET_COLUMN = "label"
crop-recommendation/src/entity/__init__.py ADDED
File without changes
crop-recommendation/src/entity/artifact_entity.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+
3
+
4
+ @dataclass
5
+ class DataIngestionArtifact:
6
+ feature_store_file_path: str
7
+ train_file_path: str
8
+ test_file_path: str
9
+
10
+
11
+ @dataclass
12
+ class DataValidationArtifact:
13
+ report_file_path: str
14
+
15
+
16
+ @dataclass
17
+ class DataTransformationArtifact:
18
+ transform_object_path: str
19
+ transformed_train_path: str
20
+ transformed_test_path: str
21
+ target_encoder_path: str
22
+
23
+
24
+ @dataclass
25
+ class ModelTrainerArtifact:
26
+ model_path: str
27
+ f1_train_score: float
28
+ f2_test_score: float
29
+
30
+
31
+ @dataclass
32
+ class ModelEvaluationArtifact:
33
+ is_model_accepted: bool
34
+ improved_accuracy: float
35
+
36
+
37
+ @dataclass
38
+ class ModelPusherArtifact:
39
+ pusher_model_dir: str
40
+ saved_model_dir: str
crop-recommendation/src/entity/config_entity.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ from src.exception import CropException
4
+ from src.logger import logging
5
+ from datetime import datetime
6
+
7
+ FILE_NAME = "crop.csv"
8
+ TRAIN_FILE_NAME = "train.csv"
9
+ TEST_FILE_NAME = "test.csv"
10
+ TRANSFORMER_OBJECT_FILE_NAME = "transformer.pkl"
11
+ TARGET_ENCODER_OBJECT_FILE_NAME = "target_encoder.pkl"
12
+ MODEL_FILE_NAME = "model.pkl"
13
+
14
+
15
+ class TrainingPipelineConfig:
16
+ def __init__(self):
17
+ try:
18
+ self.artifact_dir = os.path.join(
19
+ os.getcwd(), "artifact", f"{datetime.now().strftime('%m%d%Y__%H%M%S')}"
20
+ )
21
+ except Exception as e:
22
+ raise CropException(e, sys)
23
+
24
+
25
+ class DataIngestionConfig:
26
+ def __init__(self, training_pipeline_config: TrainingPipelineConfig):
27
+ try:
28
+ self.database_name = "smartcropguard"
29
+ self.collection_name = "crop"
30
+ self.data_ingestion_dir = os.path.join(
31
+ training_pipeline_config.artifact_dir, "data_ingestion"
32
+ )
33
+ self.feature_store_file_path = os.path.join(
34
+ self.data_ingestion_dir, "feature_store", FILE_NAME
35
+ )
36
+ self.train_file_path = os.path.join(
37
+ self.data_ingestion_dir, "dataset", TRAIN_FILE_NAME
38
+ )
39
+ self.test_file_path = os.path.join(
40
+ self.data_ingestion_dir, "dataset", TEST_FILE_NAME
41
+ )
42
+ self.test_size = 0.2
43
+ except Exception as e:
44
+ raise CropException(e, sys)
45
+
46
+ def to_dict(self) -> dict:
47
+ try:
48
+ return self.__dict__
49
+ except Exception as e:
50
+ raise CropException(e, sys)
51
+
52
+
53
+ class DataValidationConfig:
54
+ def __init__(self, training_pipeline_config: TrainingPipelineConfig):
55
+ self.data_validation_dir = os.path.join(
56
+ training_pipeline_config.artifact_dir, "data_validation"
57
+ )
58
+ self.report_file_path = os.path.join(self.data_validation_dir, "report.yaml")
59
+ self.missing_threshold = 0.2
60
+ self.base_file_path = os.path.join(
61
+ "crop-recommendation-dataset/Crop_recommendation.csv"
62
+ )
63
+
64
+
65
+ class DataTransformationConfig:
66
+ def __init__(self, training_pipeline_config: TrainingPipelineConfig):
67
+ self.data_transformation_dir = os.path.join(
68
+ training_pipeline_config.artifact_dir, "data_transformation"
69
+ )
70
+ self.transform_object_path = os.path.join(
71
+ self.data_transformation_dir,
72
+ "transformer",
73
+ TRANSFORMER_OBJECT_FILE_NAME
74
+ )
75
+ self.transformed_train_path = os.path.join(
76
+ self.data_transformation_dir,
77
+ "transformed",
78
+ TRAIN_FILE_NAME.replace("csv", "npz"),
79
+ )
80
+ self.transformed_test_path = os.path.join(
81
+ self.data_transformation_dir,
82
+ "transformed",
83
+ TEST_FILE_NAME.replace("csv", "npz"),
84
+ )
85
+ self.target_encoder_path = os.path.join(
86
+ self.data_transformation_dir,
87
+ "target_encoder",
88
+ TARGET_ENCODER_OBJECT_FILE_NAME,
89
+ )
90
+
91
+
92
+ class ModelTrainerConfig:
93
+ def __init__(self, training_pipeline_config: TrainingPipelineConfig):
94
+ self.model_trainer_dir = os.path.join(
95
+ training_pipeline_config.artifact_dir, "model_trainer"
96
+ )
97
+ self.model_path = os.path.join(self.model_trainer_dir, "model", MODEL_FILE_NAME)
98
+ self.expected_score = 0.9
99
+ self.overfitting_threshold = 0.1
100
+
101
+
102
+ class ModelEvaluationConfig:
103
+ def __init__(self, training_pipeline_config: TrainingPipelineConfig):
104
+ self.change_threshold = 0.01
105
+
106
+
107
+ class ModelPusherConfig:
108
+ def __init__(self, training_pipeline_config: TrainingPipelineConfig):
109
+ self.model_pusher_dir = os.path.join(
110
+ training_pipeline_config.artifact_dir, "model_pusher"
111
+ )
112
+ self.saved_model_dir = os.path.join("saved_models")
113
+ self.pusher_model_dir = os.path.join(self.model_pusher_dir, "saved_models")
114
+ self.pusher_model_path = os.path.join(self.pusher_model_dir, MODEL_FILE_NAME)
115
+ self.pusher_transformer_path = os.path.join(
116
+ self.pusher_model_dir, TRANSFORMER_OBJECT_FILE_NAME
117
+ )
118
+ self.pusher_target_encoder_path = os.path.join(
119
+ self.pusher_model_dir, TARGET_ENCODER_OBJECT_FILE_NAME
120
+ )
crop-recommendation/src/exception.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+
3
+
4
+ def error_message_detail(error, error_detail: sys):
5
+ _, _, exc_tb = error_detail.exc_info()
6
+ file_name = exc_tb.tb_frame.f_code.co_filename
7
+ error_message = "Error occurred python script name [{0}] line number [{1}] error message [{2}]".format(
8
+ file_name, exc_tb.tb_lineno, str(error)
9
+ )
10
+
11
+ return error_message
12
+
13
+
14
+ class CropException(Exception):
15
+ def __init__(self, error_message, error_detail: sys):
16
+ self.error_message = error_message_detail(
17
+ error_message, error_detail=error_detail
18
+ )
19
+
20
+ def __str__(self):
21
+ return self.error_message
crop-recommendation/src/logger.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ from datetime import datetime
4
+
5
+ # log file name
6
+ LOG_FILE_NAME = f"{datetime.now().strftime('%m%d%Y__%H%M%S')}.log"
7
+
8
+ # Log directory
9
+ LOG_FILE_DIR = os.path.join(os.getcwd(), "logs")
10
+
11
+ # create folder if not available
12
+ os.makedirs(LOG_FILE_DIR, exist_ok=True)
13
+
14
+ # Log file path
15
+ LOG_FILE_PATH = os.path.join(LOG_FILE_DIR, LOG_FILE_NAME)
16
+
17
+
18
+ logging.basicConfig(
19
+ filename=LOG_FILE_PATH,
20
+ format="[ %(asctime)s ] %(filename)s - %(lineno)d %(name)s - %(levelname)s - %(message)s",
21
+ level=logging.INFO,
22
+ )
crop-recommendation/src/pipeline/__init__.py ADDED
File without changes
crop-recommendation/src/pipeline/training_pipeline.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.logger import logging
2
+ from src.exception import CropException
3
+ from src.utils import get_collection_as_dataframe
4
+ from src.entity import config_entity
5
+ from src.entity import artifact_entity
6
+ import sys
7
+ from src.components.data_ingestion import DataIngestion
8
+ from src.components.data_validation import DataValidation
9
+ from src.components.data_trasformation import DataTransformation
10
+ from src.components.model_trainer import ModelTrainer
11
+ from src.components.model_evaluation import ModelEvaluation
12
+ from src.components.model_pusher import ModelPusher
13
+
14
+
15
+ def start_training_pipeline():
16
+ try:
17
+ training_pipeline_config = config_entity.TrainingPipelineConfig()
18
+
19
+ # data ingestion
20
+ data_ingestion_config = config_entity.DataIngestionConfig(
21
+ training_pipeline_config=training_pipeline_config
22
+ )
23
+ data_ingestion_config.to_dict()
24
+
25
+ data_ingestion = DataIngestion(data_ingestion_config=data_ingestion_config)
26
+ data_ingestion_artifact = data_ingestion.initiate_data_ingestion()
27
+
28
+ print(f"Data Ingestion complete")
29
+
30
+ # data validation
31
+ data_validation_config = config_entity.DataValidationConfig(
32
+ training_pipeline_config=training_pipeline_config
33
+ )
34
+
35
+ data_validation = DataValidation(
36
+ data_validation_config=data_validation_config,
37
+ data_ingestion_artifact=data_ingestion_artifact,
38
+ )
39
+
40
+ data_validation.initiate_data_validation()
41
+ print(f"Data Validation Complete")
42
+
43
+ # data transformation
44
+ data_transformation_config = config_entity.DataTransformationConfig(
45
+ training_pipeline_config=training_pipeline_config
46
+ )
47
+
48
+ data_transformation = DataTransformation(
49
+ data_transformation_config=data_transformation_config,
50
+ data_ingestion_artifact=data_ingestion_artifact,
51
+ )
52
+
53
+ data_transformation_artifact = (
54
+ data_transformation.initiate_data_transformation()
55
+ )
56
+ print(f"Data Transformation Complete")
57
+
58
+ # model trainer
59
+ model_trainer_config = config_entity.ModelTrainerConfig(
60
+ training_pipeline_config=training_pipeline_config
61
+ )
62
+
63
+ model_trainer = ModelTrainer(
64
+ model_trainer_config=model_trainer_config,
65
+ data_transformation_artifact=data_transformation_artifact,
66
+ )
67
+
68
+ model_trainer_artifact = model_trainer.initiate_model_trainer()
69
+ print(f"Model Training Complete")
70
+
71
+ # model evaluation
72
+ model_eval_config = config_entity.ModelEvaluationConfig(
73
+ training_pipeline_config=training_pipeline_config
74
+ )
75
+ model_eval = ModelEvaluation(
76
+ model_eval_config=model_eval_config,
77
+ data_ingesiton_artifact=data_ingestion_artifact,
78
+ data_transformation_artifact=data_transformation_artifact,
79
+ model_trainer_artifact=model_trainer_artifact,
80
+ )
81
+ model_eval_artifact = model_eval.initiate_model_evaluation()
82
+ print(f"Model Evaluation Complete")
83
+
84
+ # Model Puhser
85
+ model_pusher_config = config_entity.ModelPusherConfig(training_pipeline_config=training_pipeline_config)
86
+
87
+ model_pusher = ModelPusher(model_pusher_config=model_pusher_config,
88
+ data_transformation_artifact=data_transformation_config,
89
+ model_trainer_artifact=model_trainer_artifact)
90
+
91
+ model_pusher_artifact = model_pusher.initiate_model_pusher()
92
+ print(f"Model Pusher Complete")
93
+
94
+ except Exception as e:
95
+ print(e)
crop-recommendation/src/predictor.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.entity.config_entity import TRANSFORMER_OBJECT_FILE_NAME
2
+ from src.entity.config_entity import MODEL_FILE_NAME
3
+ from src.entity.config_entity import TARGET_ENCODER_OBJECT_FILE_NAME
4
+ from src.exception import CropException
5
+ from src.logger import logging
6
+
7
+ import os
8
+ import sys
9
+
10
+ from glob import glob
11
+ from typing import Optional
12
+
13
+
14
+ class ModelResolver:
15
+ def __init__(
16
+ self,
17
+ model_registry: str = "saved_models",
18
+ transformer_dir_name="transformer",
19
+ target_encoder_dir_name="target_encoder",
20
+ model_dir_name="model",
21
+ ):
22
+ self.model_registry = model_registry
23
+ os.makedirs(self.model_registry, exist_ok=True)
24
+
25
+ self.transformer_dir_name = transformer_dir_name
26
+ self.target_encoder_dir_name = target_encoder_dir_name
27
+ self.model_dir_name = model_dir_name
28
+
29
+ def get_latest_dir_path(self) -> Optional[str]:
30
+ try:
31
+ dir_names = os.listdir(self.model_registry)
32
+ if len(dir_names) == 0:
33
+ return None
34
+ dir_names = list(map(int, dir_names))
35
+ latest_dir_name = max(dir_names)
36
+ return os.path.join(self.model_registry, f"{latest_dir_name}")
37
+
38
+ except Exception as e:
39
+ raise CropException(e, sys)
40
+
41
+ def get_latest_model_path(self):
42
+ try:
43
+ latest_dir = self.get_latest_dir_path()
44
+ if latest_dir is None:
45
+ raise Exception(f"Model is not available")
46
+ return os.path.join(latest_dir, self.model_dir_name, MODEL_FILE_NAME)
47
+ except Exception as e:
48
+ raise CropException(e, sys)
49
+
50
+ def get_latest_transformer_path(self):
51
+ try:
52
+ latest_dir = self.get_latest_dir_path()
53
+ if latest_dir is None:
54
+ raise Exception(f"Transformer is not availabel")
55
+ return os.path.join(latest_dir, self.transformer_dir_name, TRANSFORMER_OBJECT_FILE_NAME)
56
+ except Exception as e:
57
+ raise CropException(e, sys)
58
+
59
+ def get_latest_target_encoder_path(self):
60
+ try:
61
+ latest_dir = self.get_latest_dir_path()
62
+ if latest_dir is None:
63
+ raise Exception(f"Target encoder is not available")
64
+
65
+ return os.path.join(latest_dir, self.target_encoder_dir_name, TARGET_ENCODER_OBJECT_FILE_NAME)
66
+
67
+ except Exception as e:
68
+ raise CropException(e, sys)
69
+
70
+
71
+ def get_latest_save_dir_path(self):
72
+ try:
73
+ latest_dir = self.get_latest_dir_path()
74
+ if latest_dir == None:
75
+ return os.path.join(self.model_registry, f"{0}")
76
+ latest_dir_num = int(os.path.basename(self.get_latest_dir_path()))
77
+ return os.path.join(self.model_registry, f"{latest_dir_num + 1}")
78
+ except Exception as e:
79
+ raise CropException(e, sys)
80
+
81
+ def get_latest_save_model_path(self):
82
+ try:
83
+ latest_dir = self.get_latest_save_dir_path()
84
+ return os.path.join(latest_dir, self.model_dir_name, MODEL_FILE_NAME)
85
+ except Exception as e:
86
+ raise CropException(e, sys)
87
+
88
+ def get_latest_save_transformer_path(self):
89
+ try:
90
+ latest_dir = self.get_latest_save_dir_path()
91
+ return os.path.join(latest_dir, self.transformer_dir_name, TRANSFORMER_OBJECT_FILE_NAME)
92
+ except Exception as e:
93
+ raise CropException(e, sys)
94
+
95
+ def get_latest_save_target_encoder_path(self):
96
+ try:
97
+ latest_dir = self.get_latest_save_dir_path()
98
+ return os.path.join(latest_dir, self.target_encoder_dir_name, TARGET_ENCODER_OBJECT_FILE_NAME)
99
+ except Exception as e:
100
+ raise CropException(e, sys)
crop-recommendation/src/utils.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from src.logger import logging
3
+ from src.exception import CropException
4
+ from src.config import mongo_client
5
+ import os
6
+ import sys
7
+ import numpy as np
8
+ import yaml
9
+ import dill
10
+
11
+
12
+ def get_collection_as_dataframe(
13
+ database_name: str, collection_name: str
14
+ ) -> pd.DataFrame:
15
+ """
16
+ Description: This function return collection as dataframe
17
+ =========================================================
18
+ Params:
19
+ database_name: database name
20
+ collection_name: collection name
21
+ =========================================================
22
+ return Pandas dataframe of a collection
23
+ """
24
+ try:
25
+ logging.info(
26
+ f"Reading data from database: {database_name} and collection: {collection_name}"
27
+ )
28
+ df = pd.DataFrame(list(mongo_client[database_name][collection_name].find()))
29
+ logging.info(f"{database_name} found in the mongodb")
30
+
31
+ if "_id" in df.columns:
32
+ logging.info("Dropping column: '_id'")
33
+ df = df.drop(columns=["_id"], axis=1)
34
+ logging.info(f"Row and columns in df: {df.shape}")
35
+ return df
36
+ except Exception as e:
37
+ raise CropException(e, sys)
38
+
39
+
40
+ def seperate_dependant_column(df: pd.DataFrame, exclude_column: list) -> pd.DataFrame:
41
+ final_dataframe = df.drop(exclude_column, axis=1)
42
+
43
+ return final_dataframe
44
+
45
+
46
+ def write_yaml_file(file_path, data: dict):
47
+ try:
48
+ file_dir = os.path.dirname(file_path)
49
+ os.makedirs(file_dir, exist_ok=True)
50
+
51
+ with open(file_path, "w") as file_writer:
52
+ yaml.dump(data, file_writer)
53
+ except Exception as e:
54
+ raise CropException(e, sys)
55
+
56
+
57
+ def save_object(file_path: str, obj: object) -> None:
58
+ try:
59
+ logging.info("Entered the save object method of utils")
60
+ os.makedirs(os.path.dirname(file_path), exist_ok=True)
61
+ with open(file_path, "wb") as file_obj:
62
+ dill.dump(obj, file_obj)
63
+ logging.info("Exited the save object method of utils")
64
+ except Exception as e:
65
+ raise CropException(e, sys)
66
+
67
+
68
+ def load_object(file_path: str) -> object:
69
+ try:
70
+ if not os.path.exists(file_path):
71
+ raise Exception(f"The file: {file_path} is not exists")
72
+ with open(file_path, "rb") as file_obj:
73
+ return dill.load(file_obj)
74
+ except Exception as e:
75
+ raise CropException(e, sys)
76
+
77
+
78
+ def save_numpy_array_data(file_path: str, array: np.array):
79
+ """
80
+ save numpy array data to file
81
+ file_path : str location of the file to save
82
+ array: np.array data to save
83
+ """
84
+ try:
85
+ dir_path = os.path.dirname(file_path)
86
+ os.makedirs(dir_path, exist_ok=True)
87
+
88
+ with open(file_path, "wb") as file_ojb:
89
+ np.save(file_obj, array)
90
+
91
+ except Exception as e:
92
+ raise CropException(e, sys)
93
+
94
+
95
+ def load_numpy_array_data(file_path: str) -> np.array:
96
+ """
97
+ load numpy array data from file
98
+ file_path: str location of file to load
99
+ return: np.array data loaded
100
+ """
101
+ try:
102
+ with open(file_path, "rb") as file_obj:
103
+ return np.load(file_obj, allow_pickle=True)
104
+
105
+ except Exception as e:
106
+ raise CropException(e, sys)