Upload 31 files
Browse files- .gitattributes +1 -0
- crop-recommendation +0 -1
- crop-recommendation/.gitignore +168 -0
- crop-recommendation/.vscode/extensions.json +10 -0
- crop-recommendation/.vscode/settings.json +8 -0
- crop-recommendation/.vscode/tasks.json +15 -0
- crop-recommendation/LICENSE +21 -0
- crop-recommendation/README.md +50 -0
- crop-recommendation/data_download.py +40 -0
- crop-recommendation/main.py +8 -0
- crop-recommendation/notebooks/crop-recommendation-notebook.ipynb +0 -0
- crop-recommendation/requirements.txt +11 -0
- crop-recommendation/saved_models/0/model/model.pkl +3 -0
- crop-recommendation/saved_models/0/target_encoder/target_encoder.pkl +0 -0
- crop-recommendation/saved_models/0/transformer/transformer.pkl +0 -0
- crop-recommendation/src/__init__.py +0 -0
- crop-recommendation/src/components/__init__.py +0 -0
- crop-recommendation/src/components/data_ingestion.py +73 -0
- crop-recommendation/src/components/data_trasformation.py +113 -0
- crop-recommendation/src/components/data_validation.py +159 -0
- crop-recommendation/src/components/model_evaluation.py +123 -0
- crop-recommendation/src/components/model_pusher.py +69 -0
- crop-recommendation/src/components/model_trainer.py +107 -0
- crop-recommendation/src/config.py +20 -0
- crop-recommendation/src/entity/__init__.py +0 -0
- crop-recommendation/src/entity/artifact_entity.py +40 -0
- crop-recommendation/src/entity/config_entity.py +120 -0
- crop-recommendation/src/exception.py +21 -0
- crop-recommendation/src/logger.py +22 -0
- crop-recommendation/src/pipeline/__init__.py +0 -0
- crop-recommendation/src/pipeline/training_pipeline.py +95 -0
- crop-recommendation/src/predictor.py +100 -0
- crop-recommendation/src/utils.py +106 -0
.gitattributes
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
crop-recommendation/saved_models/0/model/model.pkl filter=lfs diff=lfs merge=lfs -text
|
crop-recommendation
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
Subproject commit ce875580acbce4044f62e37db955614203e1232b
|
|
|
|
crop-recommendation/.gitignore
ADDED
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
share/python-wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
MANIFEST
|
28 |
+
|
29 |
+
# PyInstaller
|
30 |
+
# Usually these files are written by a python script from a template
|
31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
32 |
+
*.manifest
|
33 |
+
*.spec
|
34 |
+
|
35 |
+
# Installer logs
|
36 |
+
pip-log.txt
|
37 |
+
pip-delete-this-directory.txt
|
38 |
+
|
39 |
+
# Unit test / coverage reports
|
40 |
+
htmlcov/
|
41 |
+
.tox/
|
42 |
+
.nox/
|
43 |
+
.coverage
|
44 |
+
.coverage.*
|
45 |
+
.cache
|
46 |
+
nosetests.xml
|
47 |
+
coverage.xml
|
48 |
+
*.cover
|
49 |
+
*.py,cover
|
50 |
+
.hypothesis/
|
51 |
+
.pytest_cache/
|
52 |
+
cover/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
.pybuilder/
|
76 |
+
target/
|
77 |
+
|
78 |
+
# Jupyter Notebook
|
79 |
+
.ipynb_checkpoints
|
80 |
+
|
81 |
+
# IPython
|
82 |
+
profile_default/
|
83 |
+
ipython_config.py
|
84 |
+
|
85 |
+
# pyenv
|
86 |
+
# For a library or package, you might want to ignore these files since the code is
|
87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
88 |
+
# .python-version
|
89 |
+
|
90 |
+
# pipenv
|
91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
94 |
+
# install all needed dependencies.
|
95 |
+
#Pipfile.lock
|
96 |
+
|
97 |
+
# poetry
|
98 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
100 |
+
# commonly ignored for libraries.
|
101 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
102 |
+
#poetry.lock
|
103 |
+
|
104 |
+
# pdm
|
105 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
106 |
+
#pdm.lock
|
107 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
108 |
+
# in version control.
|
109 |
+
# https://pdm.fming.dev/#use-with-ide
|
110 |
+
.pdm.toml
|
111 |
+
|
112 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
113 |
+
__pypackages__/
|
114 |
+
|
115 |
+
# Celery stuff
|
116 |
+
celerybeat-schedule
|
117 |
+
celerybeat.pid
|
118 |
+
|
119 |
+
# SageMath parsed files
|
120 |
+
*.sage.py
|
121 |
+
|
122 |
+
# Environments
|
123 |
+
.env
|
124 |
+
.venv
|
125 |
+
env/
|
126 |
+
venv/
|
127 |
+
ENV/
|
128 |
+
env.bak/
|
129 |
+
venv.bak/
|
130 |
+
|
131 |
+
# Spyder project settings
|
132 |
+
.spyderproject
|
133 |
+
.spyproject
|
134 |
+
|
135 |
+
# Rope project settings
|
136 |
+
.ropeproject
|
137 |
+
|
138 |
+
# mkdocs documentation
|
139 |
+
/site
|
140 |
+
|
141 |
+
# mypy
|
142 |
+
.mypy_cache/
|
143 |
+
.dmypy.json
|
144 |
+
dmypy.json
|
145 |
+
|
146 |
+
# Pyre type checker
|
147 |
+
.pyre/
|
148 |
+
|
149 |
+
# pytype static type analyzer
|
150 |
+
.pytype/
|
151 |
+
|
152 |
+
# Cython debug symbols
|
153 |
+
cython_debug/
|
154 |
+
|
155 |
+
# PyCharm
|
156 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
157 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
158 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
159 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
160 |
+
#.idea/
|
161 |
+
|
162 |
+
data_dump.py
|
163 |
+
demo.ipynb
|
164 |
+
kaggle.json
|
165 |
+
crop-recommendation-dataset
|
166 |
+
catboost_info
|
167 |
+
temp.py
|
168 |
+
artifact
|
crop-recommendation/.vscode/extensions.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"recommendations": [
|
3 |
+
"mongodb.mongodb-vscode",
|
4 |
+
"ms-python.python",
|
5 |
+
"ms-toolsai.jupyter",
|
6 |
+
"ms-toolsai.jupyter-keymap",
|
7 |
+
"ms-toolsai.jupyter-renderers",
|
8 |
+
"formulahendry.code-runner"
|
9 |
+
]
|
10 |
+
}
|
crop-recommendation/.vscode/settings.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"workbench.colorTheme": "Default Dark+",
|
3 |
+
"workbench.preferredDarkColorTheme": "Default Dark+",
|
4 |
+
"task.allowAutomaticTasks": "on",
|
5 |
+
"workbench.editorAssociations": {
|
6 |
+
"*.md": "vscode.markdown.preview.editor"
|
7 |
+
}
|
8 |
+
}
|
crop-recommendation/.vscode/tasks.json
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"version": "2.0.0",
|
3 |
+
"tasks": [
|
4 |
+
{
|
5 |
+
"label": "Installing extensions and dependencies...",
|
6 |
+
"type": "shell",
|
7 |
+
"command": "code-server --install-extension mongodb.mongodb-vscode --install-extension ms-python.python --install-extension formulahendry.code-runner && pip install -r requirements.txt",
|
8 |
+
"presentation": {
|
9 |
+
"reveal": "always",
|
10 |
+
"panel": "new"
|
11 |
+
},
|
12 |
+
"runOptions": { "runOn": "folderOpen" }
|
13 |
+
}
|
14 |
+
]
|
15 |
+
}
|
crop-recommendation/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2023 Sadashiv Nandanikar
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
crop-recommendation/README.md
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Crop Recommendation
|
2 |
+
|
3 |
+
#### Harnessing the capabilities of machine learning models, analyzes specific parameters to suggest the most suitable crops, optimizing yields and efficiency.
|
4 |
+
|
5 |
+
## Demo
|
6 |
+
### Input Interface
|
7 |
+
<img src="https://github.com/07Sada/crop-recommendation/assets/112761379/3f8c5f4d-1df4-4516-b428-f4b95a2cc5df" alt="Image 1" width="800">
|
8 |
+
|
9 |
+
### Output Interface
|
10 |
+
<img src="https://github.com/07Sada/crop-recommendation/assets/112761379/86a4aefd-b973-40ad-b79c-f2b1dd070d91" alt="Image 1" width="800">
|
11 |
+
|
12 |
+
## Data Source
|
13 |
+
This dataset contains information about the soil and environmental conditions that are ideal for growing different crops. The dataset includes the following columns:
|
14 |
+
|
15 |
+
- `N`: The ratio of nitrogen content in the soil.
|
16 |
+
- `P`: The ratio of phosphorus content in the soil.
|
17 |
+
- `K`: The ratio of potassium content in the soil.
|
18 |
+
- `Temperature`: The temperature in degrees Celsius.
|
19 |
+
- `Humidity`: The relative humidity in percent.
|
20 |
+
- `pH`: The pH value of the soil.
|
21 |
+
- `Rainfall`: The rainfall in millimeters.
|
22 |
+
|
23 |
+
[Link](https://www.kaggle.com/datasets/atharvaingle/crop-recommendation-dataset) for the dataset
|
24 |
+
|
25 |
+
<details>
|
26 |
+
<summary>Supported crops
|
27 |
+
</summary>
|
28 |
+
|
29 |
+
- Apple
|
30 |
+
- Blueberry
|
31 |
+
- Cherry
|
32 |
+
- Corn
|
33 |
+
- Grape
|
34 |
+
- Pepper
|
35 |
+
- Orange
|
36 |
+
- Peach
|
37 |
+
- Potato
|
38 |
+
- Soybean
|
39 |
+
- Strawberry
|
40 |
+
- Tomato
|
41 |
+
- Squash
|
42 |
+
- Raspberry
|
43 |
+
</details>
|
44 |
+
|
45 |
+
## Project Details
|
46 |
+
This is repository is submodule for [CropGaurd](https://github.com/07Sada/CropGaurd.git)
|
47 |
+
|
48 |
+
## Project PipeLine Stages
|
49 |
+
![Project PipeLine Stages](https://user-images.githubusercontent.com/112761379/225940480-2a7381b2-6abd-4c1c-8287-0fd49099be8c.jpg)
|
50 |
+
|
crop-recommendation/data_download.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import opendatasets as od
|
2 |
+
import os
|
3 |
+
import json
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
|
6 |
+
# Load variables from .env file
|
7 |
+
load_dotenv()
|
8 |
+
|
9 |
+
DATASET_URL = "https://www.kaggle.com/datasets/atharvaingle/crop-recommendation-dataset"
|
10 |
+
|
11 |
+
def create_kaggle_json_file():
|
12 |
+
# Fetch the username and API key from the .env file
|
13 |
+
username = os.getenv('username')
|
14 |
+
key = os.getenv('key')
|
15 |
+
|
16 |
+
kaggle_credentials = {
|
17 |
+
"username": username,
|
18 |
+
"key": key
|
19 |
+
}
|
20 |
+
|
21 |
+
# Path to the kaggle.json file
|
22 |
+
kaggle_file_path = os.path.join(os.getcwd(), 'kaggle.json')
|
23 |
+
|
24 |
+
# Write the dictionary to the .kaggle/kaggle.json file
|
25 |
+
with open(kaggle_file_path, 'w') as file:
|
26 |
+
json.dump(kaggle_credentials, file)
|
27 |
+
|
28 |
+
def remove_kaggle_json_file():
|
29 |
+
# Path to the kaggle.json file
|
30 |
+
kaggle_file_path = os.path.join(os.getcwd(), 'kaggle.json')
|
31 |
+
|
32 |
+
# Remove the kaggle.json file
|
33 |
+
os.remove(kaggle_file_path)
|
34 |
+
|
35 |
+
create_kaggle_json_file()
|
36 |
+
|
37 |
+
od.download(DATASET_URL)
|
38 |
+
|
39 |
+
# Remove the kaggle.json file after downloading the dataset
|
40 |
+
remove_kaggle_json_file()
|
crop-recommendation/main.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from src.pipeline.training_pipeline import start_training_pipeline
|
2 |
+
|
3 |
+
if __name__ =="__main__":
|
4 |
+
try:
|
5 |
+
start_training_pipeline()
|
6 |
+
|
7 |
+
except Exception as e:
|
8 |
+
print(e)
|
crop-recommendation/notebooks/crop-recommendation-notebook.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
crop-recommendation/requirements.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pymongo
|
2 |
+
pandas
|
3 |
+
numpy
|
4 |
+
matplotlib
|
5 |
+
seaborn
|
6 |
+
scikit-learn
|
7 |
+
opendatasets
|
8 |
+
python-dotenv
|
9 |
+
ipykernel
|
10 |
+
PyYAML
|
11 |
+
dill
|
crop-recommendation/saved_models/0/model/model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61347ed5e6bbb2060eddc5a515c43e9d61aae5f6f1c7eaecb1f52b64f2df89a5
|
3 |
+
size 3676666
|
crop-recommendation/saved_models/0/target_encoder/target_encoder.pkl
ADDED
Binary file (499 Bytes). View file
|
|
crop-recommendation/saved_models/0/transformer/transformer.pkl
ADDED
Binary file (901 Bytes). View file
|
|
crop-recommendation/src/__init__.py
ADDED
File without changes
|
crop-recommendation/src/components/__init__.py
ADDED
File without changes
|
crop-recommendation/src/components/data_ingestion.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from src.entity import config_entity
|
2 |
+
from src.entity import artifact_entity
|
3 |
+
from src.exception import CropException
|
4 |
+
from src.logger import logging
|
5 |
+
from src import utils
|
6 |
+
|
7 |
+
from sklearn.model_selection import train_test_split
|
8 |
+
import numpy as np
|
9 |
+
import pandas as pd
|
10 |
+
import sys
|
11 |
+
import os
|
12 |
+
|
13 |
+
|
14 |
+
class DataIngestion:
|
15 |
+
def __init__(self, data_ingestion_config: config_entity.DataIngestionConfig):
|
16 |
+
try:
|
17 |
+
logging.info(f"{'>>'*20} Data Ingestion {'<<'*20}")
|
18 |
+
self.data_ingestion_config = data_ingestion_config
|
19 |
+
except Exception as e:
|
20 |
+
raise CropException(e, sys)
|
21 |
+
|
22 |
+
def initiate_data_ingestion(self) -> artifact_entity.DataIngestionArtifact:
|
23 |
+
try:
|
24 |
+
logging.info("Exporting collection data as pandas dataframe")
|
25 |
+
|
26 |
+
df: pd.DataFrame = utils.get_collection_as_dataframe(
|
27 |
+
database_name=self.data_ingestion_config.database_name,
|
28 |
+
collection_name=self.data_ingestion_config.collection_name,
|
29 |
+
)
|
30 |
+
|
31 |
+
logging.info("Saving data in feature store")
|
32 |
+
|
33 |
+
feature_store_dir = os.path.dirname(self.data_ingestion_config.feature_store_file_path)
|
34 |
+
os.makedirs(feature_store_dir, exist_ok=True)
|
35 |
+
|
36 |
+
logging.info("Saving dataframe into feature store")
|
37 |
+
df.to_csv(
|
38 |
+
path_or_buf=self.data_ingestion_config.feature_store_file_path,
|
39 |
+
index=False,
|
40 |
+
header=True,
|
41 |
+
)
|
42 |
+
|
43 |
+
logging.info("split dataset into train and test test")
|
44 |
+
train_df, test_df = train_test_split(
|
45 |
+
df, test_size=self.data_ingestion_config.test_size, random_state=42
|
46 |
+
)
|
47 |
+
|
48 |
+
logging.info("create dataset directory folder if not available")
|
49 |
+
dataset_dir = os.path.dirname(self.data_ingestion_config.train_file_path)
|
50 |
+
os.makedirs(dataset_dir, exist_ok=True)
|
51 |
+
|
52 |
+
logging.info("Save df to feature store folder")
|
53 |
+
train_df.to_csv(
|
54 |
+
path_or_buf=self.data_ingestion_config.train_file_path,
|
55 |
+
index=False,
|
56 |
+
header=True,
|
57 |
+
)
|
58 |
+
test_df.to_csv(
|
59 |
+
path_or_buf=self.data_ingestion_config.test_file_path,
|
60 |
+
index=False,
|
61 |
+
header=True,
|
62 |
+
)
|
63 |
+
|
64 |
+
data_ingestion_artifact = artifact_entity.DataIngestionArtifact(
|
65 |
+
feature_store_file_path=self.data_ingestion_config.feature_store_file_path,
|
66 |
+
train_file_path=self.data_ingestion_config.train_file_path,
|
67 |
+
test_file_path=self.data_ingestion_config.test_file_path,
|
68 |
+
)
|
69 |
+
logging.info(f"Data ingestion artifact: {data_ingestion_artifact}")
|
70 |
+
return data_ingestion_artifact
|
71 |
+
|
72 |
+
except Exception as e:
|
73 |
+
raise CropException(error_message=e, error_detail=sys)
|
crop-recommendation/src/components/data_trasformation.py
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from src.entity import artifact_entity
|
2 |
+
from src.entity import config_entity
|
3 |
+
from src.logger import logging
|
4 |
+
from src.exception import CropException
|
5 |
+
from src import utils
|
6 |
+
from src.config import TARGET_COLUMN
|
7 |
+
|
8 |
+
from typing import Optional
|
9 |
+
import os
|
10 |
+
import sys
|
11 |
+
|
12 |
+
from sklearn.pipeline import Pipeline
|
13 |
+
from sklearn.preprocessing import LabelEncoder
|
14 |
+
from sklearn.preprocessing import StandardScaler
|
15 |
+
import pandas as pd
|
16 |
+
import numpy as np
|
17 |
+
|
18 |
+
|
19 |
+
class DataTransformation:
|
20 |
+
def __init__(
|
21 |
+
self,
|
22 |
+
data_transformation_config: config_entity.DataTransformationConfig,
|
23 |
+
data_ingestion_artifact: artifact_entity.DataIngestionArtifact,
|
24 |
+
):
|
25 |
+
try:
|
26 |
+
logging.info(f"{'>'*20} Data Transformation Initiated {'<'*20}")
|
27 |
+
self.data_transformation_config = data_transformation_config
|
28 |
+
self.data_ingestion_artifact = data_ingestion_artifact
|
29 |
+
|
30 |
+
except Exception as e:
|
31 |
+
raise CropException(e, sys)
|
32 |
+
|
33 |
+
@classmethod
|
34 |
+
def get_data_tranformer_object(cls) -> Pipeline:
|
35 |
+
try:
|
36 |
+
standard_scaler = StandardScaler()
|
37 |
+
|
38 |
+
pipeline = Pipeline(steps=[("StandardScaler", standard_scaler)])
|
39 |
+
|
40 |
+
return pipeline
|
41 |
+
|
42 |
+
except Exception as e:
|
43 |
+
raise CropException(e, sys)
|
44 |
+
|
45 |
+
def initiate_data_transformation(
|
46 |
+
self,
|
47 |
+
) -> artifact_entity.DataTransformationArtifact:
|
48 |
+
try:
|
49 |
+
# reading training and testing file
|
50 |
+
train_df = pd.read_csv(self.data_ingestion_artifact.train_file_path)
|
51 |
+
test_df = pd.read_csv(self.data_ingestion_artifact.test_file_path)
|
52 |
+
|
53 |
+
# selecting input features for train and test dataframe
|
54 |
+
input_feature_train_df = train_df.drop(TARGET_COLUMN, axis=1)
|
55 |
+
input_feature_test_df = test_df.drop(TARGET_COLUMN, axis=1)
|
56 |
+
|
57 |
+
# selecting target feature for train and test dataframe
|
58 |
+
target_feature_train_df = train_df[TARGET_COLUMN]
|
59 |
+
target_feature_test_df = test_df[TARGET_COLUMN]
|
60 |
+
|
61 |
+
label_encoder = LabelEncoder()
|
62 |
+
label_encoder.fit(target_feature_train_df)
|
63 |
+
|
64 |
+
# transformation on target column
|
65 |
+
target_feature_train_arr = label_encoder.transform(target_feature_train_df)
|
66 |
+
target_feature_test_arr = label_encoder.transform(target_feature_test_df)
|
67 |
+
|
68 |
+
# transforming input features
|
69 |
+
transformation_pipeline = DataTransformation.get_data_tranformer_object()
|
70 |
+
transformation_pipeline.fit(input_feature_train_df)
|
71 |
+
|
72 |
+
input_feature_train_arr = transformation_pipeline.transform(
|
73 |
+
input_feature_train_df
|
74 |
+
)
|
75 |
+
input_feature_test_arr = transformation_pipeline.transform(
|
76 |
+
input_feature_test_df
|
77 |
+
)
|
78 |
+
|
79 |
+
train_arr = np.c_[input_feature_train_arr, target_feature_train_arr]
|
80 |
+
test_arr = np.c_[input_feature_test_arr, target_feature_test_arr]
|
81 |
+
|
82 |
+
# save the numpy array
|
83 |
+
utils.save_object(
|
84 |
+
file_path=self.data_transformation_config.transformed_train_path,
|
85 |
+
obj=train_arr,
|
86 |
+
)
|
87 |
+
utils.save_object(
|
88 |
+
file_path=self.data_transformation_config.transformed_test_path,
|
89 |
+
obj=test_arr,
|
90 |
+
)
|
91 |
+
|
92 |
+
utils.save_object(
|
93 |
+
file_path=self.data_transformation_config.transform_object_path,
|
94 |
+
obj=transformation_pipeline,
|
95 |
+
)
|
96 |
+
|
97 |
+
utils.save_object(
|
98 |
+
file_path=self.data_transformation_config.target_encoder_path,
|
99 |
+
obj=label_encoder,
|
100 |
+
)
|
101 |
+
|
102 |
+
data_transformation_artifact = artifact_entity.DataTransformationArtifact(
|
103 |
+
transform_object_path=self.data_transformation_config.transform_object_path,
|
104 |
+
transformed_train_path=self.data_transformation_config.transformed_train_path,
|
105 |
+
transformed_test_path=self.data_transformation_config.transformed_test_path,
|
106 |
+
target_encoder_path=self.data_transformation_config.target_encoder_path,
|
107 |
+
)
|
108 |
+
|
109 |
+
logging.info(f"Data transformation object : {data_transformation_artifact}")
|
110 |
+
return data_transformation_artifact
|
111 |
+
|
112 |
+
except Exception as e:
|
113 |
+
raise CropException(e, sys)
|
crop-recommendation/src/components/data_validation.py
ADDED
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from src.entity import artifact_entity
|
2 |
+
from src.entity import config_entity
|
3 |
+
from src.logger import logging
|
4 |
+
from src.exception import CropException
|
5 |
+
from src.config import TARGET_COLUMN
|
6 |
+
from src import utils
|
7 |
+
|
8 |
+
from typing import Optional
|
9 |
+
from scipy.stats import ks_2samp
|
10 |
+
import pandas as pd
|
11 |
+
import numpy as np
|
12 |
+
import sys
|
13 |
+
import os
|
14 |
+
|
15 |
+
|
16 |
+
class DataValidation:
|
17 |
+
def __init__(
|
18 |
+
self,
|
19 |
+
data_validation_config: config_entity.DataValidationConfig,
|
20 |
+
data_ingestion_artifact: artifact_entity.DataIngestionArtifact,
|
21 |
+
):
|
22 |
+
try:
|
23 |
+
logging.info(f"{'>'*20} Data Validation iniated {'<'*20}")
|
24 |
+
self.data_validation_config = data_validation_config
|
25 |
+
self.data_ingestion_artifact = data_ingestion_artifact
|
26 |
+
self.validation_error = dict()
|
27 |
+
except Exception as e:
|
28 |
+
raise CropException(e, sys)
|
29 |
+
|
30 |
+
def is_required_columns_exists(
|
31 |
+
self, base_df: pd.DataFrame, current_df: pd.DataFrame, report_key_name: str
|
32 |
+
) -> bool:
|
33 |
+
try:
|
34 |
+
base_columns = base_df.columns
|
35 |
+
current_columns = current_df.columns
|
36 |
+
|
37 |
+
missing_columns = []
|
38 |
+
for base_column in base_columns:
|
39 |
+
if base_column not in current_columns:
|
40 |
+
logging.info(f"Column: {base_column} is not available")
|
41 |
+
missing_columns.append(base_column)
|
42 |
+
|
43 |
+
if len(missing_columns) > 0:
|
44 |
+
self.validation_error[report_key_name] = missing_columns
|
45 |
+
return False
|
46 |
+
|
47 |
+
return True
|
48 |
+
|
49 |
+
except Exception as e:
|
50 |
+
raise CropException(e, sys)
|
51 |
+
|
52 |
+
def data_drift(
|
53 |
+
self, base_df: pd.DataFrame, current_df: pd.DataFrame, report_key_name: str
|
54 |
+
):
|
55 |
+
try:
|
56 |
+
drift_report = dict()
|
57 |
+
|
58 |
+
base_columns = base_df.columns
|
59 |
+
current_columns = current_df.columns
|
60 |
+
|
61 |
+
for base_column in base_columns:
|
62 |
+
base_data, current_data = base_df[base_column], current_df[base_column]
|
63 |
+
|
64 |
+
# Null hypothesis is that both columns data drawn from same distribution
|
65 |
+
|
66 |
+
logging.info(
|
67 |
+
f"Hypothesis {base_column} : {base_data.dtype}, {current_data.dtype}"
|
68 |
+
)
|
69 |
+
same_distribution = ks_2samp(base_data, current_data)
|
70 |
+
|
71 |
+
if same_distribution.pvalue > 0.05:
|
72 |
+
# we are accepting the null hypothesis
|
73 |
+
drift_report[base_column] = {
|
74 |
+
"pvalue": float(same_distribution.pvalue),
|
75 |
+
"same_distribution": True,
|
76 |
+
}
|
77 |
+
|
78 |
+
else:
|
79 |
+
drift_report[base_column] = {
|
80 |
+
"pvalue": float(same_distribution.pvalue),
|
81 |
+
"same_distribution": False,
|
82 |
+
}
|
83 |
+
|
84 |
+
self.validation_error[report_key_name] = drift_report
|
85 |
+
|
86 |
+
except Exception as e:
|
87 |
+
raise CropException(e, sys)
|
88 |
+
|
89 |
+
def initiate_data_validation(self) -> artifact_entity.DataValidationArtifact:
|
90 |
+
try:
|
91 |
+
logging.info(f"Reading base dataframe")
|
92 |
+
base_df = pd.read_csv(self.data_validation_config.base_file_path)
|
93 |
+
|
94 |
+
logging.info(f"Reading train dataframe")
|
95 |
+
train_df = pd.read_csv(self.data_ingestion_artifact.train_file_path)
|
96 |
+
|
97 |
+
logging.info(f"Reading test dataframe")
|
98 |
+
test_df = pd.read_csv(self.data_ingestion_artifact.test_file_path)
|
99 |
+
|
100 |
+
exclude_column = [TARGET_COLUMN]
|
101 |
+
base_df = utils.seperate_dependant_column(
|
102 |
+
df=base_df, exclude_column=exclude_column
|
103 |
+
)
|
104 |
+
train_df = utils.seperate_dependant_column(
|
105 |
+
df=train_df, exclude_column=exclude_column
|
106 |
+
)
|
107 |
+
test_df = utils.seperate_dependant_column(
|
108 |
+
df=test_df, exclude_column=exclude_column
|
109 |
+
)
|
110 |
+
|
111 |
+
logging.info(f"Is all required columns present in the train_df")
|
112 |
+
train_df_columns_status = self.is_required_columns_exists(
|
113 |
+
base_df=base_df,
|
114 |
+
current_df=train_df,
|
115 |
+
report_key_name="missing_columns_within_train_dataset",
|
116 |
+
)
|
117 |
+
|
118 |
+
test_df_columns_status = self.is_required_columns_exists(
|
119 |
+
base_df=base_df,
|
120 |
+
current_df=test_df,
|
121 |
+
report_key_name="missing_columns_within_test_dataset",
|
122 |
+
)
|
123 |
+
|
124 |
+
if train_df_columns_status:
|
125 |
+
logging.info(
|
126 |
+
f"As all column are available in train df hence detecting data drift"
|
127 |
+
)
|
128 |
+
self.data_drift(
|
129 |
+
base_df=base_df,
|
130 |
+
current_df=train_df,
|
131 |
+
report_key_name="data_drift_within_train_dataset",
|
132 |
+
)
|
133 |
+
|
134 |
+
if test_df_columns_status:
|
135 |
+
logging.info(
|
136 |
+
f"As all column are available in test df hence detecting data drift"
|
137 |
+
)
|
138 |
+
self.data_drift(
|
139 |
+
base_df=base_df,
|
140 |
+
current_df=test_df,
|
141 |
+
report_key_name="data_drift_within_test_dataset",
|
142 |
+
)
|
143 |
+
|
144 |
+
# writing the report
|
145 |
+
logging.info("Writing report in yaml format")
|
146 |
+
utils.write_yaml_file(
|
147 |
+
file_path=self.data_validation_config.report_file_path,
|
148 |
+
data=self.validation_error,
|
149 |
+
)
|
150 |
+
|
151 |
+
data_validation_artifact = artifact_entity.DataValidationArtifact(
|
152 |
+
report_file_path=self.data_validation_config.report_file_path
|
153 |
+
)
|
154 |
+
logging.info(f"Data validation artifact: {data_validation_artifact}")
|
155 |
+
|
156 |
+
return data_validation_artifact
|
157 |
+
|
158 |
+
except Exception as e:
|
159 |
+
raise CropException(e, sys)
|
crop-recommendation/src/components/model_evaluation.py
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from src.predictor import ModelResolver
|
2 |
+
from src.entity import config_entity
|
3 |
+
from src.entity import artifact_entity
|
4 |
+
from src.logger import logging
|
5 |
+
from src.exception import CropException
|
6 |
+
from src.config import TARGET_COLUMN
|
7 |
+
from src.utils import load_object
|
8 |
+
|
9 |
+
from sklearn.metrics import f1_score
|
10 |
+
import pandas as pd
|
11 |
+
import numpy as np
|
12 |
+
import os
|
13 |
+
import sys
|
14 |
+
|
15 |
+
|
16 |
+
class ModelEvaluation:
|
17 |
+
def __init__(
|
18 |
+
self,
|
19 |
+
model_eval_config: config_entity.ModelEvaluationConfig,
|
20 |
+
data_ingesiton_artifact: artifact_entity.DataIngestionArtifact,
|
21 |
+
data_transformation_artifact: artifact_entity.DataTransformationArtifact,
|
22 |
+
model_trainer_artifact: artifact_entity.ModelTrainerArtifact,
|
23 |
+
):
|
24 |
+
try:
|
25 |
+
logging.info(f"{'>'*20} Model Evaluation Initiated {'<'*20}")
|
26 |
+
self.model_eval_config = model_eval_config
|
27 |
+
self.data_ingesiton_artifact = data_ingesiton_artifact
|
28 |
+
self.data_transformation_artifact = data_transformation_artifact
|
29 |
+
self.model_trainer_artifact = model_trainer_artifact
|
30 |
+
self.model_resolver = ModelResolver()
|
31 |
+
|
32 |
+
except Exception as e:
|
33 |
+
raise CropException(e, sys)
|
34 |
+
|
35 |
+
def initiate_model_evaluation(self) -> artifact_entity.ModelEvaluationArtifact:
|
36 |
+
try:
|
37 |
+
logging.info(
|
38 |
+
f"If the saved model directory contains a model, we will compare which model is best trained: \
|
39 |
+
the model from the saved model folder or the new model."
|
40 |
+
)
|
41 |
+
|
42 |
+
latest_dir_path = self.model_resolver.get_latest_dir_path()
|
43 |
+
if latest_dir_path == None:
|
44 |
+
model_eval_artifact = artifact_entity.ModelEvaluationArtifact(
|
45 |
+
is_model_accepted=True, improved_accuracy=None
|
46 |
+
)
|
47 |
+
logging.info(f"Model evaluation artifact: {model_eval_artifact}")
|
48 |
+
return model_eval_artifact
|
49 |
+
|
50 |
+
# finding location of transformed model, and target encoder
|
51 |
+
logging.info(f"Finding location of transformer model and target encoder")
|
52 |
+
transformer_path = self.model_resolver.get_latest_transformer_path()
|
53 |
+
|
54 |
+
model_path = self.model_resolver.get_latest_model_path()
|
55 |
+
|
56 |
+
target_encoder_path = self.model_resolver.get_latest_target_encoder_path()
|
57 |
+
|
58 |
+
logging.info(
|
59 |
+
f"Previous trained objects of transformer, model and target encoder"
|
60 |
+
)
|
61 |
+
# previous trained objects
|
62 |
+
transformer = load_object(file_path=transformer_path)
|
63 |
+
model = load_object(file_path=model_path)
|
64 |
+
target_encoder = load_object(file_path=target_encoder_path)
|
65 |
+
|
66 |
+
logging.info(f"Currently trained model objects")
|
67 |
+
# currently trained model objects
|
68 |
+
current_transformer = load_object(
|
69 |
+
file_path=self.data_transformation_artifact.transform_object_path
|
70 |
+
)
|
71 |
+
current_model = load_object(
|
72 |
+
file_path=self.model_trainer_artifact.model_path
|
73 |
+
)
|
74 |
+
current_target_encoder = load_object(
|
75 |
+
file_path=self.data_transformation_artifact.target_encoder_path
|
76 |
+
)
|
77 |
+
|
78 |
+
test_df = pd.read_csv(self.data_ingesiton_artifact.test_file_path)
|
79 |
+
target_df = test_df[TARGET_COLUMN]
|
80 |
+
|
81 |
+
y_true = target_encoder.transform(target_df)
|
82 |
+
|
83 |
+
# accuracy using previous trained model
|
84 |
+
|
85 |
+
input_feature_name = list(transformer.feature_names_in_)
|
86 |
+
input_arr = transformer.transform(test_df[input_feature_name])
|
87 |
+
|
88 |
+
y_pred = current_model.predict(input_arr)
|
89 |
+
y_true = current_target_encoder.transform(target_df)
|
90 |
+
|
91 |
+
|
92 |
+
previous_model_score = f1_score(
|
93 |
+
y_true=y_true, y_pred=y_pred, average="weighted"
|
94 |
+
)
|
95 |
+
|
96 |
+
# accuracy using current model
|
97 |
+
input_feature_name = list(current_transformer.feature_names_in_)
|
98 |
+
input_arr = current_transformer.transform(test_df[input_feature_name])
|
99 |
+
|
100 |
+
y_pred = current_model.predict(input_arr)
|
101 |
+
y_true = current_target_encoder.transform(target_df)
|
102 |
+
|
103 |
+
|
104 |
+
current_model_score = f1_score(
|
105 |
+
y_true=y_true, y_pred=y_pred, average="weighted"
|
106 |
+
)
|
107 |
+
|
108 |
+
logging.info(f"Accuracy using current trained model: {current_model_score}")
|
109 |
+
|
110 |
+
if current_model_score <= previous_model_score:
|
111 |
+
logging.info(f"Current trained model is not better than previous model")
|
112 |
+
raise Exception("Current trained model is not better than previous model")
|
113 |
+
|
114 |
+
model_eval_artifact = artifact_entity.ModelEvaluationArtifact(
|
115 |
+
is_model_accepted=True,
|
116 |
+
improved_accuracy=current_model_score - previous_model_score,
|
117 |
+
)
|
118 |
+
logging.info(f"Model Eval artifacts: {model_eval_artifact}")
|
119 |
+
|
120 |
+
return model_eval_artifact
|
121 |
+
|
122 |
+
except Exception as e:
|
123 |
+
raise CropException(e, sys)
|
crop-recommendation/src/components/model_pusher.py
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from src.entity.config_entity import ModelPusherConfig
|
2 |
+
from src.entity import artifact_entity
|
3 |
+
from src.predictor import ModelResolver
|
4 |
+
from src.exception import CropException
|
5 |
+
from src.logger import logging
|
6 |
+
from src.utils import load_object, save_object
|
7 |
+
from src.entity.artifact_entity import (
|
8 |
+
DataTransformationArtifact,
|
9 |
+
ModelTrainerArtifact,
|
10 |
+
ModelPusherArtifact,
|
11 |
+
)
|
12 |
+
import sys
|
13 |
+
import os
|
14 |
+
|
15 |
+
|
16 |
+
class ModelPusher:
|
17 |
+
def __init__(
|
18 |
+
self,
|
19 |
+
model_pusher_config: ModelPusherConfig,
|
20 |
+
data_transformation_artifact: DataTransformationArtifact,
|
21 |
+
model_trainer_artifact: ModelTrainerArtifact,
|
22 |
+
):
|
23 |
+
try:
|
24 |
+
logging.info(f"{'>'*20} Model Pusher Initiated {'<'*30}")
|
25 |
+
self.model_pusher_config = model_pusher_config
|
26 |
+
self.data_transformation_artifact = data_transformation_artifact
|
27 |
+
self.model_trainer_artifact = model_trainer_artifact
|
28 |
+
self.model_resolver = ModelResolver(
|
29 |
+
model_registry=self.model_pusher_config.saved_model_dir
|
30 |
+
)
|
31 |
+
except Exception as e:
|
32 |
+
raise CropException(e, sys)
|
33 |
+
|
34 |
+
def initiate_model_pusher(self) -> ModelPusherArtifact:
|
35 |
+
try:
|
36 |
+
# load object
|
37 |
+
logging.info(f"Loading transformer model and target encoder")
|
38 |
+
transformer = load_object(file_path=self.data_transformation_artifact.transform_object_path)
|
39 |
+
model = load_object(file_path=self.model_trainer_artifact.model_path)
|
40 |
+
target_encoder = load_object(file_path=self.data_transformation_artifact.target_encoder_path)
|
41 |
+
|
42 |
+
# model pusher dir
|
43 |
+
logging.info(f"Saving model into model pusher directory")
|
44 |
+
save_object(file_path=self.model_pusher_config.pusher_transformer_path,obj=transformer)
|
45 |
+
save_object(file_path=self.model_pusher_config.pusher_model_path, obj=model)
|
46 |
+
save_object(file_path=self.model_pusher_config.pusher_target_encoder_path, obj=target_encoder)
|
47 |
+
|
48 |
+
|
49 |
+
# saved model dir
|
50 |
+
logging.info(f"Saving model in saved model dir")
|
51 |
+
|
52 |
+
transformer_path = self.model_resolver.get_latest_save_transformer_path()
|
53 |
+
model_path = self.model_resolver.get_latest_save_model_path()
|
54 |
+
target_encoder_path = self.model_resolver.get_latest_save_target_encoder_path()
|
55 |
+
|
56 |
+
save_object(file_path=transformer_path, obj=transformer)
|
57 |
+
save_object(file_path=model_path, obj=model)
|
58 |
+
save_object(file_path=target_encoder_path, obj=target_encoder)
|
59 |
+
|
60 |
+
model_pusher_artifact = ModelPusherArtifact(
|
61 |
+
pusher_model_dir=self.model_pusher_config.pusher_model_dir,
|
62 |
+
saved_model_dir=self.model_pusher_config.saved_model_dir,
|
63 |
+
)
|
64 |
+
logging.info(f"Model Pusher artifact: {model_pusher_artifact}")
|
65 |
+
|
66 |
+
return model_pusher_artifact
|
67 |
+
|
68 |
+
except Exception as e:
|
69 |
+
raise CropException(e, sys)
|
crop-recommendation/src/components/model_trainer.py
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from src.entity import config_entity
|
2 |
+
from src.entity import artifact_entity
|
3 |
+
from src.logger import logging
|
4 |
+
from src.exception import CropException
|
5 |
+
from src import utils
|
6 |
+
|
7 |
+
from typing import Optional
|
8 |
+
from sklearn.metrics import f1_score
|
9 |
+
from sklearn.ensemble import RandomForestClassifier
|
10 |
+
import os
|
11 |
+
import sys
|
12 |
+
|
13 |
+
|
14 |
+
class ModelTrainer:
|
15 |
+
def __init__(
|
16 |
+
self,
|
17 |
+
model_trainer_config: config_entity.ModelTrainerConfig,
|
18 |
+
data_transformation_artifact: artifact_entity.DataTransformationArtifact,
|
19 |
+
):
|
20 |
+
try:
|
21 |
+
logging.info(f"{'>'*30} Model Trainer Initiated {'<'*30}")
|
22 |
+
self.model_trainer_config = model_trainer_config
|
23 |
+
self.data_transformation_artifact = data_transformation_artifact
|
24 |
+
|
25 |
+
except Exception as e:
|
26 |
+
raise CropException(e, sys)
|
27 |
+
|
28 |
+
def train_model(self, X, y):
|
29 |
+
try:
|
30 |
+
random_forest = RandomForestClassifier()
|
31 |
+
random_forest.fit(X, y)
|
32 |
+
|
33 |
+
return random_forest
|
34 |
+
|
35 |
+
except Exception as e:
|
36 |
+
raise CropException(e, sys)
|
37 |
+
|
38 |
+
def initiate_model_trainer(self) -> artifact_entity.ModelTrainerArtifact:
|
39 |
+
try:
|
40 |
+
logging.info(f"Loading train and test array")
|
41 |
+
train_arr = utils.load_numpy_array_data(
|
42 |
+
file_path=self.data_transformation_artifact.transformed_train_path
|
43 |
+
)
|
44 |
+
test_arr = utils.load_numpy_array_data(
|
45 |
+
file_path=self.data_transformation_artifact.transformed_test_path
|
46 |
+
)
|
47 |
+
|
48 |
+
logging.info(
|
49 |
+
f"Splitting input and target feature from both train and test arr. "
|
50 |
+
)
|
51 |
+
X_train, y_train = train_arr[:, :-1], train_arr[:, -1]
|
52 |
+
X_test, y_test = test_arr[:, :-1], test_arr[:, -1]
|
53 |
+
|
54 |
+
logging.info(f"Training the model")
|
55 |
+
model = self.train_model(X=X_train, y=y_train)
|
56 |
+
|
57 |
+
logging.info(f"Calculating f1 train scrore")
|
58 |
+
yhat_train = model.predict(X_train)
|
59 |
+
f1_train_score = f1_score(
|
60 |
+
y_true=y_train, y_pred=yhat_train, average="weighted"
|
61 |
+
)
|
62 |
+
|
63 |
+
logging.info(f"Calculating f1 test score")
|
64 |
+
yhat_test = model.predict(X_test)
|
65 |
+
f1_test_score = f1_score(
|
66 |
+
y_true=y_test, y_pred=yhat_test, average="weighted"
|
67 |
+
)
|
68 |
+
|
69 |
+
logging.info(
|
70 |
+
f"train_score: {f1_train_score} and test score: {f1_test_score}"
|
71 |
+
)
|
72 |
+
|
73 |
+
# checking for overfitting or underfitting or expected score
|
74 |
+
logging.info(f"Checking if out model is underfitting or not")
|
75 |
+
if f1_test_score < self.model_trainer_config.expected_score:
|
76 |
+
raise Exception(
|
77 |
+
f"Model is not good as it is not able to give \
|
78 |
+
expected accuracy: {self.model_trainer_config.expected_score}, model actual score: {f1_test_score}"
|
79 |
+
)
|
80 |
+
|
81 |
+
logging.info(f"Checking if our model is overfitting or not")
|
82 |
+
diff = abs(f1_train_score - f1_test_score)
|
83 |
+
|
84 |
+
if diff > self.model_trainer_config.overfitting_threshold:
|
85 |
+
raise Exception(
|
86 |
+
f"Train and test score diff: {diff} \
|
87 |
+
is more than overfitting threshold: {self.model_trainer_config.overfitting_threshold}"
|
88 |
+
)
|
89 |
+
|
90 |
+
# save the trained model
|
91 |
+
logging.info(f"Saving model object")
|
92 |
+
utils.save_object(file_path=self.model_trainer_config.model_path, obj=model)
|
93 |
+
|
94 |
+
# prepare artifact
|
95 |
+
logging.info(f"Prepare the artifact")
|
96 |
+
model_trainer_artifact = artifact_entity.ModelTrainerArtifact(
|
97 |
+
model_path=self.model_trainer_config.model_path,
|
98 |
+
f1_train_score=f1_train_score,
|
99 |
+
f2_test_score=f1_test_score,
|
100 |
+
)
|
101 |
+
|
102 |
+
logging.info(f"Model trainer artifact: {model_trainer_artifact}")
|
103 |
+
|
104 |
+
return model_trainer_artifact
|
105 |
+
|
106 |
+
except Exception as e:
|
107 |
+
raise CropException(e, sys)
|
crop-recommendation/src/config.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pymongo
|
2 |
+
import pandas as pd
|
3 |
+
import json
|
4 |
+
from dataclasses import dataclass
|
5 |
+
import os
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
|
8 |
+
load_dotenv()
|
9 |
+
|
10 |
+
|
11 |
+
@dataclass
|
12 |
+
class EnvironmentVariable:
|
13 |
+
mongo_db_url = os.getenv("MONGO_URL")
|
14 |
+
|
15 |
+
|
16 |
+
env = EnvironmentVariable()
|
17 |
+
|
18 |
+
mongo_client = pymongo.MongoClient(env.mongo_db_url)
|
19 |
+
|
20 |
+
TARGET_COLUMN = "label"
|
crop-recommendation/src/entity/__init__.py
ADDED
File without changes
|
crop-recommendation/src/entity/artifact_entity.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
+
|
3 |
+
|
4 |
+
@dataclass
|
5 |
+
class DataIngestionArtifact:
|
6 |
+
feature_store_file_path: str
|
7 |
+
train_file_path: str
|
8 |
+
test_file_path: str
|
9 |
+
|
10 |
+
|
11 |
+
@dataclass
|
12 |
+
class DataValidationArtifact:
|
13 |
+
report_file_path: str
|
14 |
+
|
15 |
+
|
16 |
+
@dataclass
|
17 |
+
class DataTransformationArtifact:
|
18 |
+
transform_object_path: str
|
19 |
+
transformed_train_path: str
|
20 |
+
transformed_test_path: str
|
21 |
+
target_encoder_path: str
|
22 |
+
|
23 |
+
|
24 |
+
@dataclass
|
25 |
+
class ModelTrainerArtifact:
|
26 |
+
model_path: str
|
27 |
+
f1_train_score: float
|
28 |
+
f2_test_score: float
|
29 |
+
|
30 |
+
|
31 |
+
@dataclass
|
32 |
+
class ModelEvaluationArtifact:
|
33 |
+
is_model_accepted: bool
|
34 |
+
improved_accuracy: float
|
35 |
+
|
36 |
+
|
37 |
+
@dataclass
|
38 |
+
class ModelPusherArtifact:
|
39 |
+
pusher_model_dir: str
|
40 |
+
saved_model_dir: str
|
crop-recommendation/src/entity/config_entity.py
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
from src.exception import CropException
|
4 |
+
from src.logger import logging
|
5 |
+
from datetime import datetime
|
6 |
+
|
7 |
+
FILE_NAME = "crop.csv"
|
8 |
+
TRAIN_FILE_NAME = "train.csv"
|
9 |
+
TEST_FILE_NAME = "test.csv"
|
10 |
+
TRANSFORMER_OBJECT_FILE_NAME = "transformer.pkl"
|
11 |
+
TARGET_ENCODER_OBJECT_FILE_NAME = "target_encoder.pkl"
|
12 |
+
MODEL_FILE_NAME = "model.pkl"
|
13 |
+
|
14 |
+
|
15 |
+
class TrainingPipelineConfig:
|
16 |
+
def __init__(self):
|
17 |
+
try:
|
18 |
+
self.artifact_dir = os.path.join(
|
19 |
+
os.getcwd(), "artifact", f"{datetime.now().strftime('%m%d%Y__%H%M%S')}"
|
20 |
+
)
|
21 |
+
except Exception as e:
|
22 |
+
raise CropException(e, sys)
|
23 |
+
|
24 |
+
|
25 |
+
class DataIngestionConfig:
|
26 |
+
def __init__(self, training_pipeline_config: TrainingPipelineConfig):
|
27 |
+
try:
|
28 |
+
self.database_name = "smartcropguard"
|
29 |
+
self.collection_name = "crop"
|
30 |
+
self.data_ingestion_dir = os.path.join(
|
31 |
+
training_pipeline_config.artifact_dir, "data_ingestion"
|
32 |
+
)
|
33 |
+
self.feature_store_file_path = os.path.join(
|
34 |
+
self.data_ingestion_dir, "feature_store", FILE_NAME
|
35 |
+
)
|
36 |
+
self.train_file_path = os.path.join(
|
37 |
+
self.data_ingestion_dir, "dataset", TRAIN_FILE_NAME
|
38 |
+
)
|
39 |
+
self.test_file_path = os.path.join(
|
40 |
+
self.data_ingestion_dir, "dataset", TEST_FILE_NAME
|
41 |
+
)
|
42 |
+
self.test_size = 0.2
|
43 |
+
except Exception as e:
|
44 |
+
raise CropException(e, sys)
|
45 |
+
|
46 |
+
def to_dict(self) -> dict:
|
47 |
+
try:
|
48 |
+
return self.__dict__
|
49 |
+
except Exception as e:
|
50 |
+
raise CropException(e, sys)
|
51 |
+
|
52 |
+
|
53 |
+
class DataValidationConfig:
|
54 |
+
def __init__(self, training_pipeline_config: TrainingPipelineConfig):
|
55 |
+
self.data_validation_dir = os.path.join(
|
56 |
+
training_pipeline_config.artifact_dir, "data_validation"
|
57 |
+
)
|
58 |
+
self.report_file_path = os.path.join(self.data_validation_dir, "report.yaml")
|
59 |
+
self.missing_threshold = 0.2
|
60 |
+
self.base_file_path = os.path.join(
|
61 |
+
"crop-recommendation-dataset/Crop_recommendation.csv"
|
62 |
+
)
|
63 |
+
|
64 |
+
|
65 |
+
class DataTransformationConfig:
|
66 |
+
def __init__(self, training_pipeline_config: TrainingPipelineConfig):
|
67 |
+
self.data_transformation_dir = os.path.join(
|
68 |
+
training_pipeline_config.artifact_dir, "data_transformation"
|
69 |
+
)
|
70 |
+
self.transform_object_path = os.path.join(
|
71 |
+
self.data_transformation_dir,
|
72 |
+
"transformer",
|
73 |
+
TRANSFORMER_OBJECT_FILE_NAME
|
74 |
+
)
|
75 |
+
self.transformed_train_path = os.path.join(
|
76 |
+
self.data_transformation_dir,
|
77 |
+
"transformed",
|
78 |
+
TRAIN_FILE_NAME.replace("csv", "npz"),
|
79 |
+
)
|
80 |
+
self.transformed_test_path = os.path.join(
|
81 |
+
self.data_transformation_dir,
|
82 |
+
"transformed",
|
83 |
+
TEST_FILE_NAME.replace("csv", "npz"),
|
84 |
+
)
|
85 |
+
self.target_encoder_path = os.path.join(
|
86 |
+
self.data_transformation_dir,
|
87 |
+
"target_encoder",
|
88 |
+
TARGET_ENCODER_OBJECT_FILE_NAME,
|
89 |
+
)
|
90 |
+
|
91 |
+
|
92 |
+
class ModelTrainerConfig:
|
93 |
+
def __init__(self, training_pipeline_config: TrainingPipelineConfig):
|
94 |
+
self.model_trainer_dir = os.path.join(
|
95 |
+
training_pipeline_config.artifact_dir, "model_trainer"
|
96 |
+
)
|
97 |
+
self.model_path = os.path.join(self.model_trainer_dir, "model", MODEL_FILE_NAME)
|
98 |
+
self.expected_score = 0.9
|
99 |
+
self.overfitting_threshold = 0.1
|
100 |
+
|
101 |
+
|
102 |
+
class ModelEvaluationConfig:
|
103 |
+
def __init__(self, training_pipeline_config: TrainingPipelineConfig):
|
104 |
+
self.change_threshold = 0.01
|
105 |
+
|
106 |
+
|
107 |
+
class ModelPusherConfig:
|
108 |
+
def __init__(self, training_pipeline_config: TrainingPipelineConfig):
|
109 |
+
self.model_pusher_dir = os.path.join(
|
110 |
+
training_pipeline_config.artifact_dir, "model_pusher"
|
111 |
+
)
|
112 |
+
self.saved_model_dir = os.path.join("saved_models")
|
113 |
+
self.pusher_model_dir = os.path.join(self.model_pusher_dir, "saved_models")
|
114 |
+
self.pusher_model_path = os.path.join(self.pusher_model_dir, MODEL_FILE_NAME)
|
115 |
+
self.pusher_transformer_path = os.path.join(
|
116 |
+
self.pusher_model_dir, TRANSFORMER_OBJECT_FILE_NAME
|
117 |
+
)
|
118 |
+
self.pusher_target_encoder_path = os.path.join(
|
119 |
+
self.pusher_model_dir, TARGET_ENCODER_OBJECT_FILE_NAME
|
120 |
+
)
|
crop-recommendation/src/exception.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
|
3 |
+
|
4 |
+
def error_message_detail(error, error_detail: sys):
|
5 |
+
_, _, exc_tb = error_detail.exc_info()
|
6 |
+
file_name = exc_tb.tb_frame.f_code.co_filename
|
7 |
+
error_message = "Error occurred python script name [{0}] line number [{1}] error message [{2}]".format(
|
8 |
+
file_name, exc_tb.tb_lineno, str(error)
|
9 |
+
)
|
10 |
+
|
11 |
+
return error_message
|
12 |
+
|
13 |
+
|
14 |
+
class CropException(Exception):
|
15 |
+
def __init__(self, error_message, error_detail: sys):
|
16 |
+
self.error_message = error_message_detail(
|
17 |
+
error_message, error_detail=error_detail
|
18 |
+
)
|
19 |
+
|
20 |
+
def __str__(self):
|
21 |
+
return self.error_message
|
crop-recommendation/src/logger.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import os
|
3 |
+
from datetime import datetime
|
4 |
+
|
5 |
+
# log file name
|
6 |
+
LOG_FILE_NAME = f"{datetime.now().strftime('%m%d%Y__%H%M%S')}.log"
|
7 |
+
|
8 |
+
# Log directory
|
9 |
+
LOG_FILE_DIR = os.path.join(os.getcwd(), "logs")
|
10 |
+
|
11 |
+
# create folder if not available
|
12 |
+
os.makedirs(LOG_FILE_DIR, exist_ok=True)
|
13 |
+
|
14 |
+
# Log file path
|
15 |
+
LOG_FILE_PATH = os.path.join(LOG_FILE_DIR, LOG_FILE_NAME)
|
16 |
+
|
17 |
+
|
18 |
+
logging.basicConfig(
|
19 |
+
filename=LOG_FILE_PATH,
|
20 |
+
format="[ %(asctime)s ] %(filename)s - %(lineno)d %(name)s - %(levelname)s - %(message)s",
|
21 |
+
level=logging.INFO,
|
22 |
+
)
|
crop-recommendation/src/pipeline/__init__.py
ADDED
File without changes
|
crop-recommendation/src/pipeline/training_pipeline.py
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from src.logger import logging
|
2 |
+
from src.exception import CropException
|
3 |
+
from src.utils import get_collection_as_dataframe
|
4 |
+
from src.entity import config_entity
|
5 |
+
from src.entity import artifact_entity
|
6 |
+
import sys
|
7 |
+
from src.components.data_ingestion import DataIngestion
|
8 |
+
from src.components.data_validation import DataValidation
|
9 |
+
from src.components.data_trasformation import DataTransformation
|
10 |
+
from src.components.model_trainer import ModelTrainer
|
11 |
+
from src.components.model_evaluation import ModelEvaluation
|
12 |
+
from src.components.model_pusher import ModelPusher
|
13 |
+
|
14 |
+
|
15 |
+
def start_training_pipeline():
|
16 |
+
try:
|
17 |
+
training_pipeline_config = config_entity.TrainingPipelineConfig()
|
18 |
+
|
19 |
+
# data ingestion
|
20 |
+
data_ingestion_config = config_entity.DataIngestionConfig(
|
21 |
+
training_pipeline_config=training_pipeline_config
|
22 |
+
)
|
23 |
+
data_ingestion_config.to_dict()
|
24 |
+
|
25 |
+
data_ingestion = DataIngestion(data_ingestion_config=data_ingestion_config)
|
26 |
+
data_ingestion_artifact = data_ingestion.initiate_data_ingestion()
|
27 |
+
|
28 |
+
print(f"Data Ingestion complete")
|
29 |
+
|
30 |
+
# data validation
|
31 |
+
data_validation_config = config_entity.DataValidationConfig(
|
32 |
+
training_pipeline_config=training_pipeline_config
|
33 |
+
)
|
34 |
+
|
35 |
+
data_validation = DataValidation(
|
36 |
+
data_validation_config=data_validation_config,
|
37 |
+
data_ingestion_artifact=data_ingestion_artifact,
|
38 |
+
)
|
39 |
+
|
40 |
+
data_validation.initiate_data_validation()
|
41 |
+
print(f"Data Validation Complete")
|
42 |
+
|
43 |
+
# data transformation
|
44 |
+
data_transformation_config = config_entity.DataTransformationConfig(
|
45 |
+
training_pipeline_config=training_pipeline_config
|
46 |
+
)
|
47 |
+
|
48 |
+
data_transformation = DataTransformation(
|
49 |
+
data_transformation_config=data_transformation_config,
|
50 |
+
data_ingestion_artifact=data_ingestion_artifact,
|
51 |
+
)
|
52 |
+
|
53 |
+
data_transformation_artifact = (
|
54 |
+
data_transformation.initiate_data_transformation()
|
55 |
+
)
|
56 |
+
print(f"Data Transformation Complete")
|
57 |
+
|
58 |
+
# model trainer
|
59 |
+
model_trainer_config = config_entity.ModelTrainerConfig(
|
60 |
+
training_pipeline_config=training_pipeline_config
|
61 |
+
)
|
62 |
+
|
63 |
+
model_trainer = ModelTrainer(
|
64 |
+
model_trainer_config=model_trainer_config,
|
65 |
+
data_transformation_artifact=data_transformation_artifact,
|
66 |
+
)
|
67 |
+
|
68 |
+
model_trainer_artifact = model_trainer.initiate_model_trainer()
|
69 |
+
print(f"Model Training Complete")
|
70 |
+
|
71 |
+
# model evaluation
|
72 |
+
model_eval_config = config_entity.ModelEvaluationConfig(
|
73 |
+
training_pipeline_config=training_pipeline_config
|
74 |
+
)
|
75 |
+
model_eval = ModelEvaluation(
|
76 |
+
model_eval_config=model_eval_config,
|
77 |
+
data_ingesiton_artifact=data_ingestion_artifact,
|
78 |
+
data_transformation_artifact=data_transformation_artifact,
|
79 |
+
model_trainer_artifact=model_trainer_artifact,
|
80 |
+
)
|
81 |
+
model_eval_artifact = model_eval.initiate_model_evaluation()
|
82 |
+
print(f"Model Evaluation Complete")
|
83 |
+
|
84 |
+
# Model Puhser
|
85 |
+
model_pusher_config = config_entity.ModelPusherConfig(training_pipeline_config=training_pipeline_config)
|
86 |
+
|
87 |
+
model_pusher = ModelPusher(model_pusher_config=model_pusher_config,
|
88 |
+
data_transformation_artifact=data_transformation_config,
|
89 |
+
model_trainer_artifact=model_trainer_artifact)
|
90 |
+
|
91 |
+
model_pusher_artifact = model_pusher.initiate_model_pusher()
|
92 |
+
print(f"Model Pusher Complete")
|
93 |
+
|
94 |
+
except Exception as e:
|
95 |
+
print(e)
|
crop-recommendation/src/predictor.py
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from src.entity.config_entity import TRANSFORMER_OBJECT_FILE_NAME
|
2 |
+
from src.entity.config_entity import MODEL_FILE_NAME
|
3 |
+
from src.entity.config_entity import TARGET_ENCODER_OBJECT_FILE_NAME
|
4 |
+
from src.exception import CropException
|
5 |
+
from src.logger import logging
|
6 |
+
|
7 |
+
import os
|
8 |
+
import sys
|
9 |
+
|
10 |
+
from glob import glob
|
11 |
+
from typing import Optional
|
12 |
+
|
13 |
+
|
14 |
+
class ModelResolver:
|
15 |
+
def __init__(
|
16 |
+
self,
|
17 |
+
model_registry: str = "saved_models",
|
18 |
+
transformer_dir_name="transformer",
|
19 |
+
target_encoder_dir_name="target_encoder",
|
20 |
+
model_dir_name="model",
|
21 |
+
):
|
22 |
+
self.model_registry = model_registry
|
23 |
+
os.makedirs(self.model_registry, exist_ok=True)
|
24 |
+
|
25 |
+
self.transformer_dir_name = transformer_dir_name
|
26 |
+
self.target_encoder_dir_name = target_encoder_dir_name
|
27 |
+
self.model_dir_name = model_dir_name
|
28 |
+
|
29 |
+
def get_latest_dir_path(self) -> Optional[str]:
|
30 |
+
try:
|
31 |
+
dir_names = os.listdir(self.model_registry)
|
32 |
+
if len(dir_names) == 0:
|
33 |
+
return None
|
34 |
+
dir_names = list(map(int, dir_names))
|
35 |
+
latest_dir_name = max(dir_names)
|
36 |
+
return os.path.join(self.model_registry, f"{latest_dir_name}")
|
37 |
+
|
38 |
+
except Exception as e:
|
39 |
+
raise CropException(e, sys)
|
40 |
+
|
41 |
+
def get_latest_model_path(self):
|
42 |
+
try:
|
43 |
+
latest_dir = self.get_latest_dir_path()
|
44 |
+
if latest_dir is None:
|
45 |
+
raise Exception(f"Model is not available")
|
46 |
+
return os.path.join(latest_dir, self.model_dir_name, MODEL_FILE_NAME)
|
47 |
+
except Exception as e:
|
48 |
+
raise CropException(e, sys)
|
49 |
+
|
50 |
+
def get_latest_transformer_path(self):
|
51 |
+
try:
|
52 |
+
latest_dir = self.get_latest_dir_path()
|
53 |
+
if latest_dir is None:
|
54 |
+
raise Exception(f"Transformer is not availabel")
|
55 |
+
return os.path.join(latest_dir, self.transformer_dir_name, TRANSFORMER_OBJECT_FILE_NAME)
|
56 |
+
except Exception as e:
|
57 |
+
raise CropException(e, sys)
|
58 |
+
|
59 |
+
def get_latest_target_encoder_path(self):
|
60 |
+
try:
|
61 |
+
latest_dir = self.get_latest_dir_path()
|
62 |
+
if latest_dir is None:
|
63 |
+
raise Exception(f"Target encoder is not available")
|
64 |
+
|
65 |
+
return os.path.join(latest_dir, self.target_encoder_dir_name, TARGET_ENCODER_OBJECT_FILE_NAME)
|
66 |
+
|
67 |
+
except Exception as e:
|
68 |
+
raise CropException(e, sys)
|
69 |
+
|
70 |
+
|
71 |
+
def get_latest_save_dir_path(self):
|
72 |
+
try:
|
73 |
+
latest_dir = self.get_latest_dir_path()
|
74 |
+
if latest_dir == None:
|
75 |
+
return os.path.join(self.model_registry, f"{0}")
|
76 |
+
latest_dir_num = int(os.path.basename(self.get_latest_dir_path()))
|
77 |
+
return os.path.join(self.model_registry, f"{latest_dir_num + 1}")
|
78 |
+
except Exception as e:
|
79 |
+
raise CropException(e, sys)
|
80 |
+
|
81 |
+
def get_latest_save_model_path(self):
|
82 |
+
try:
|
83 |
+
latest_dir = self.get_latest_save_dir_path()
|
84 |
+
return os.path.join(latest_dir, self.model_dir_name, MODEL_FILE_NAME)
|
85 |
+
except Exception as e:
|
86 |
+
raise CropException(e, sys)
|
87 |
+
|
88 |
+
def get_latest_save_transformer_path(self):
|
89 |
+
try:
|
90 |
+
latest_dir = self.get_latest_save_dir_path()
|
91 |
+
return os.path.join(latest_dir, self.transformer_dir_name, TRANSFORMER_OBJECT_FILE_NAME)
|
92 |
+
except Exception as e:
|
93 |
+
raise CropException(e, sys)
|
94 |
+
|
95 |
+
def get_latest_save_target_encoder_path(self):
|
96 |
+
try:
|
97 |
+
latest_dir = self.get_latest_save_dir_path()
|
98 |
+
return os.path.join(latest_dir, self.target_encoder_dir_name, TARGET_ENCODER_OBJECT_FILE_NAME)
|
99 |
+
except Exception as e:
|
100 |
+
raise CropException(e, sys)
|
crop-recommendation/src/utils.py
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from src.logger import logging
|
3 |
+
from src.exception import CropException
|
4 |
+
from src.config import mongo_client
|
5 |
+
import os
|
6 |
+
import sys
|
7 |
+
import numpy as np
|
8 |
+
import yaml
|
9 |
+
import dill
|
10 |
+
|
11 |
+
|
12 |
+
def get_collection_as_dataframe(
|
13 |
+
database_name: str, collection_name: str
|
14 |
+
) -> pd.DataFrame:
|
15 |
+
"""
|
16 |
+
Description: This function return collection as dataframe
|
17 |
+
=========================================================
|
18 |
+
Params:
|
19 |
+
database_name: database name
|
20 |
+
collection_name: collection name
|
21 |
+
=========================================================
|
22 |
+
return Pandas dataframe of a collection
|
23 |
+
"""
|
24 |
+
try:
|
25 |
+
logging.info(
|
26 |
+
f"Reading data from database: {database_name} and collection: {collection_name}"
|
27 |
+
)
|
28 |
+
df = pd.DataFrame(list(mongo_client[database_name][collection_name].find()))
|
29 |
+
logging.info(f"{database_name} found in the mongodb")
|
30 |
+
|
31 |
+
if "_id" in df.columns:
|
32 |
+
logging.info("Dropping column: '_id'")
|
33 |
+
df = df.drop(columns=["_id"], axis=1)
|
34 |
+
logging.info(f"Row and columns in df: {df.shape}")
|
35 |
+
return df
|
36 |
+
except Exception as e:
|
37 |
+
raise CropException(e, sys)
|
38 |
+
|
39 |
+
|
40 |
+
def seperate_dependant_column(df: pd.DataFrame, exclude_column: list) -> pd.DataFrame:
|
41 |
+
final_dataframe = df.drop(exclude_column, axis=1)
|
42 |
+
|
43 |
+
return final_dataframe
|
44 |
+
|
45 |
+
|
46 |
+
def write_yaml_file(file_path, data: dict):
|
47 |
+
try:
|
48 |
+
file_dir = os.path.dirname(file_path)
|
49 |
+
os.makedirs(file_dir, exist_ok=True)
|
50 |
+
|
51 |
+
with open(file_path, "w") as file_writer:
|
52 |
+
yaml.dump(data, file_writer)
|
53 |
+
except Exception as e:
|
54 |
+
raise CropException(e, sys)
|
55 |
+
|
56 |
+
|
57 |
+
def save_object(file_path: str, obj: object) -> None:
|
58 |
+
try:
|
59 |
+
logging.info("Entered the save object method of utils")
|
60 |
+
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
61 |
+
with open(file_path, "wb") as file_obj:
|
62 |
+
dill.dump(obj, file_obj)
|
63 |
+
logging.info("Exited the save object method of utils")
|
64 |
+
except Exception as e:
|
65 |
+
raise CropException(e, sys)
|
66 |
+
|
67 |
+
|
68 |
+
def load_object(file_path: str) -> object:
|
69 |
+
try:
|
70 |
+
if not os.path.exists(file_path):
|
71 |
+
raise Exception(f"The file: {file_path} is not exists")
|
72 |
+
with open(file_path, "rb") as file_obj:
|
73 |
+
return dill.load(file_obj)
|
74 |
+
except Exception as e:
|
75 |
+
raise CropException(e, sys)
|
76 |
+
|
77 |
+
|
78 |
+
def save_numpy_array_data(file_path: str, array: np.array):
|
79 |
+
"""
|
80 |
+
save numpy array data to file
|
81 |
+
file_path : str location of the file to save
|
82 |
+
array: np.array data to save
|
83 |
+
"""
|
84 |
+
try:
|
85 |
+
dir_path = os.path.dirname(file_path)
|
86 |
+
os.makedirs(dir_path, exist_ok=True)
|
87 |
+
|
88 |
+
with open(file_path, "wb") as file_ojb:
|
89 |
+
np.save(file_obj, array)
|
90 |
+
|
91 |
+
except Exception as e:
|
92 |
+
raise CropException(e, sys)
|
93 |
+
|
94 |
+
|
95 |
+
def load_numpy_array_data(file_path: str) -> np.array:
|
96 |
+
"""
|
97 |
+
load numpy array data from file
|
98 |
+
file_path: str location of file to load
|
99 |
+
return: np.array data loaded
|
100 |
+
"""
|
101 |
+
try:
|
102 |
+
with open(file_path, "rb") as file_obj:
|
103 |
+
return np.load(file_obj, allow_pickle=True)
|
104 |
+
|
105 |
+
except Exception as e:
|
106 |
+
raise CropException(e, sys)
|