diff --git a/.github/workflows/sync-to-huggingface.yml b/.github/workflows/sync-to-huggingface.yml new file mode 100644 index 0000000000000000000000000000000000000000..ec7f2e3453a38f1acb5a1f66282ecb18cba8392f --- /dev/null +++ b/.github/workflows/sync-to-huggingface.yml @@ -0,0 +1,21 @@ +name: Sync to Hugging Face hub +on: + push: + branches: [main] + + # to run this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + sync-to-hub: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + submodules: recursive + fetch-depth: 0 + lfs: true + - name: Push to hub + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + run: git push --force https://Sadashiv:$HF_TOKEN@huggingface.co/spaces/Sadashiv/CropGaurd main diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..ee665b33b4ecd93fda77e44ca7c8e9d8ee54772e --- /dev/null +++ b/.gitignore @@ -0,0 +1,164 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +main.py +extra_notebook +notebok \ No newline at end of file diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000000000000000000000000000000000..c8a9f49a75bb6d4229e37376a66dc5bf124e569f --- /dev/null +++ b/.gitmodules @@ -0,0 +1,9 @@ +[submodule "Fertilizer-Recommendation"] + path = Fertilizer-Recommendation + url = https://github.com/07Sada/Fertilizer-Recommendation.git +[submodule "crop-recommendation"] + path = crop-recommendation + url = https://github.com/07Sada/crop-recommendation.git +[submodule "plant-diseases-classifier"] + path = plant-diseases-classifier + url = https://github.com/07Sada/plant-diseases-classifier.git diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 0000000000000000000000000000000000000000..f2443f916752e0f49a4510f1a3fc4761e34b4f45 --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,10 @@ +{ + "recommendations": [ + "mongodb.mongodb-vscode", + "ms-python.python", + "ms-toolsai.jupyter", + "ms-toolsai.jupyter-keymap", + "ms-toolsai.jupyter-renderers", + "formulahendry.code-runner" + ] +} diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000000000000000000000000000000000000..7ecf6fd89f583d7735afa16d25efae8c4a8d4c0a --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,8 @@ +{ + "workbench.colorTheme": "Cobalt2", + "workbench.preferredDarkColorTheme": "Default Dark+", + "task.allowAutomaticTasks": "on", + "workbench.editorAssociations": { + "*.md": "vscode.markdown.preview.editor" + } +} diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000000000000000000000000000000000000..df60023539b2e7a04dd3b9cf841b61cc8b851cc6 --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,15 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "Installing extensions and dependencies...", + "type": "shell", + "command": "code-server --install-extension mongodb.mongodb-vscode --install-extension ms-python.python --install-extension formulahendry.code-runner && pip install -r requirements.txt", + "presentation": { + "reveal": "always", + "panel": "new" + }, + "runOptions": { "runOn": "folderOpen" } + } + ] +} diff --git a/Fertilizer-Recommendation/.gitignore b/Fertilizer-Recommendation/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..df58e4f5bdca3651c16cb73851db7a1f35fe2b57 --- /dev/null +++ b/Fertilizer-Recommendation/.gitignore @@ -0,0 +1,166 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ +data_dump.py +dataset_download.py +demo.ipynb + +artifact +catboost_info \ No newline at end of file diff --git a/Fertilizer-Recommendation/.vscode/extensions.json b/Fertilizer-Recommendation/.vscode/extensions.json new file mode 100644 index 0000000000000000000000000000000000000000..c9c6c690032684d0217fd13c78d2f91ac478c219 --- /dev/null +++ b/Fertilizer-Recommendation/.vscode/extensions.json @@ -0,0 +1,13 @@ +{ + "recommendations": [ + "mongodb.mongodb-vscode", + "ms-python.python", + "ms-toolsai.jupyter", + "ms-toolsai.jupyter-keymap", + "ms-toolsai.jupyter-renderers", + "formulahendry.code-runner", + "wesbos.theme-cobalt2", + "PKief.material-icon-theme", + "wesbos.theme-cobalt2" + ] +} diff --git a/Fertilizer-Recommendation/.vscode/settings.json b/Fertilizer-Recommendation/.vscode/settings.json new file mode 100644 index 0000000000000000000000000000000000000000..c77dce512bea97b77c181731955c0e95e8e202fc --- /dev/null +++ b/Fertilizer-Recommendation/.vscode/settings.json @@ -0,0 +1,8 @@ +{ + "workbench.colorTheme": "Cobalt2", + "workbench.preferredDarkColorTheme": "Cobalt2", + "task.allowAutomaticTasks": "on", + "workbench.editorAssociations": { + "*.md": "vscode.markdown.preview.editor" + } +} diff --git a/Fertilizer-Recommendation/.vscode/tasks.json b/Fertilizer-Recommendation/.vscode/tasks.json new file mode 100644 index 0000000000000000000000000000000000000000..df60023539b2e7a04dd3b9cf841b61cc8b851cc6 --- /dev/null +++ b/Fertilizer-Recommendation/.vscode/tasks.json @@ -0,0 +1,15 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "Installing extensions and dependencies...", + "type": "shell", + "command": "code-server --install-extension mongodb.mongodb-vscode --install-extension ms-python.python --install-extension formulahendry.code-runner && pip install -r requirements.txt", + "presentation": { + "reveal": "always", + "panel": "new" + }, + "runOptions": { "runOn": "folderOpen" } + } + ] +} diff --git a/Fertilizer-Recommendation/LICENSE b/Fertilizer-Recommendation/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..e121f2e91d7901e575e89d72fbae0276c0409c4e --- /dev/null +++ b/Fertilizer-Recommendation/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Sadashiv Nandanikar + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Fertilizer-Recommendation/README.md b/Fertilizer-Recommendation/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e29c75b46ee7417da6f8053736aec25e1e244cba --- /dev/null +++ b/Fertilizer-Recommendation/README.md @@ -0,0 +1,43 @@ +# Fertilizer Recommendation +### Powered by machine learning models, evaluates input factors to provide precise fertilizer recommendations, enhancing crop health and productivity. + +## Demo +### Input Interface +Image 1 + +### Output Interface +Image 1 + +## Data Source +This dataset contains information about the soil, environmental conditions, crop type, and fertilizer use for different crops. The dataset includes the following columns: + +- `Temparature`: The temperature in degrees Celsius. +- `Humidity`: The relative humidity in percent. +- `Moisture`: The moisture content of the soil in percent. +- `Soil Type`: The type of soil. +- `Crop Type`: The type of crop. +- `Nitrogen`: The amount of nitrogen in the soil in kilograms per hectare. +- `Potassium`: The amount of potassium in the soil in kilograms per hectare. +- `Phosphorous`: The amount of phosphorus in the soil in kilograms per hectare. +- `Fertilizer Name`: The name of the fertilizer used. + +[Link](https://www.kaggle.com/datasets/gdabhishek/fertilizer-prediction) for the dataset + +
+ Supported fertilizer + + +- UREA +- DAP +- 14-35-14 +- 28-28 +- 17-17-17 +- 20-20 +- 10-26-26 +
+ +## Project Details +This repository is submodule for [CropGaurd](https://github.com/07Sada/CropGaurd.git) + +## Project PipeLine Stages +![Project PipeLine Stages](https://user-images.githubusercontent.com/112761379/225940480-2a7381b2-6abd-4c1c-8287-0fd49099be8c.jpg) diff --git a/Fertilizer-Recommendation/fertilizer-prediction/Fertilizer Prediction.csv b/Fertilizer-Recommendation/fertilizer-prediction/Fertilizer Prediction.csv new file mode 100644 index 0000000000000000000000000000000000000000..f3cb521b378fab010faf565d130a2fb070b158de --- /dev/null +++ b/Fertilizer-Recommendation/fertilizer-prediction/Fertilizer Prediction.csv @@ -0,0 +1,100 @@ +Temparature,Humidity ,Moisture,Soil Type,Crop Type,Nitrogen,Potassium,Phosphorous,Fertilizer Name +26,52,38,Sandy,Maize,37,0,0,Urea +29,52,45,Loamy,Sugarcane,12,0,36,DAP +34,65,62,Black,Cotton,7,9,30,14-35-14 +32,62,34,Red,Tobacco,22,0,20,28-28 +28,54,46,Clayey,Paddy,35,0,0,Urea +26,52,35,Sandy,Barley,12,10,13,17-17-17 +25,50,64,Red,Cotton,9,0,10,20-20 +33,64,50,Loamy,Wheat,41,0,0,Urea +30,60,42,Sandy,Millets,21,0,18,28-28 +29,58,33,Black,Oil seeds,9,7,30,14-35-14 +27,54,28,Clayey,Pulses,13,0,40,DAP +31,62,48,Sandy,Maize,14,15,12,17-17-17 +25,50,65,Loamy,Cotton,36,0,0,Urea +32,62,41,Clayey,Paddy,24,0,22,28-28 +26,52,31,Red,Ground Nuts,14,0,41,DAP +31,62,49,Black,Sugarcane,10,13,14,17-17-17 +33,64,34,Clayey,Pulses,38,0,0,Urea +25,50,39,Sandy,Barley,21,0,19,28-28 +28,54,65,Black,Cotton,39,0,0,Urea +29,58,52,Loamy,Wheat,13,0,36,DAP +30,60,44,Sandy,Millets,10,0,9,20-20 +34,65,53,Loamy,Sugarcane,12,14,12,17-17-17 +35,68,33,Red,Tobacco,11,0,37,DAP +28,54,37,Black,Millets,36,0,0,Urea +33,64,39,Clayey,Paddy,13,0,10,20-20 +26,52,44,Sandy,Maize,23,0,20,28-28 +30,60,63,Red,Cotton,9,9,29,14-35-14 +32,62,30,Loamy,Sugarcane,38,0,0,Urea +37,70,32,Black,Oil seeds,12,0,39,DAP +26,52,36,Clayey,Pulses,14,0,13,20-20 +29,58,40,Red,Ground Nuts,24,0,23,28-28 +30,60,27,Loamy,Sugarcane,12,0,40,DAP +34,65,38,Clayey,Paddy,39,0,0,Urea +36,68,38,Sandy,Barley,7,9,30,14-35-14 +26,52,48,Loamy,Wheat,23,0,19,28-28 +28,54,35,Black,Millets,41,0,0,Urea +30,60,61,Loamy,Cotton,8,10,31,14-35-14 +37,70,37,Clayey,Paddy,12,0,41,DAP +25,50,26,Red,Ground Nuts,15,14,11,17-17-17 +29,58,34,Sandy,Millets,15,0,37,DAP +27,54,30,Clayey,Pulses,13,0,13,20-20 +30,60,58,Loamy,Sugarcane,10,7,32,14-35-14 +32,62,34,Red,Tobacco,22,0,24,28-28 +34,65,60,Black,Sugarcane,35,0,0,Urea +35,67,42,Sandy,Barley,10,0,35,DAP +38,70,48,Loamy,Wheat,8,8,28,14-35-14 +26,52,32,Black,Oil seeds,12,0,8,20-20 +29,58,43,Clayey,Paddy,24,0,18,28-28 +30,60,29,Red,Ground Nuts,41,0,0,Urea +33,64,51,Sandy,Maize,5,9,29,14-35-14 +34,65,31,Red,Tobacco,23,0,21,28-28 +36,68,33,Black,Oil seeds,13,0,14,20-20 +28,54,38,Clayey,Pulses,40,0,0,Urea +30,60,47,Sandy,Barley,12,0,42,DAP +31,62,63,Red,Cotton,11,12,15,17-17-17 +27,53,43,Black,Millets,23,0,24,28-28 +34,65,54,Loamy,Wheat,38,0,0,Urea +29,58,37,Sandy,Millets,8,0,15,20-20 +25,50,56,Loamy,Sugarcane,11,13,15,17-17-17 +32,62,34,Red,Ground Nuts,15,0,37,DAP +28,54,41,Clayey,Paddy,36,0,0,Urea +30,60,49,Loamy,Wheat,13,0,9,20-20 +34,65,64,Black,Cotton,24,0,20,28-28 +28,54,47,Sandy,Barley,5,18,15,10-26-26 +27,53,35,Black,Oil seeds,37,0,0,Urea +36,68,62,Red,Cotton,15,0,40,DAP +34,65,57,Black,Sugarcane,9,0,13,20-20 +29,58,55,Loamy,Sugarcane,8,8,33,14-35-14 +25,50,40,Clayey,Pulses,6,19,16,10-26-26 +30,60,38,Sandy,Millets,10,0,14,20-20 +26,52,39,Clayey,Pulses,21,0,23,28-28 +31,62,32,Red,Tobacco,39,0,0,Urea +34,65,48,Loamy,Wheat,23,0,19,28-28 +27,53,34,Black,Oil seeds,42,0,0,Urea +33,64,31,Red,Ground Nuts,13,0,39,DAP +29,58,42,Clayey,Paddy,9,10,22,14-35-14 +30,60,47,Sandy,Maize,22,0,21,28-28 +27,53,59,Loamy,Sugarcane,10,0,15,20-20 +26,52,36,Clayey,Pulses,7,16,20,10-26-26 +34,65,63,Red,Cotton,14,0,38,DAP +28,54,43,Clayey,Paddy,10,8,29,14-35-14 +30,60,40,Sandy,Millets,41,0,0,Urea +29,58,65,Black,Cotton,14,0,35,DAP +26,52,59,Loamy,Sugarcane,11,0,9,20-20 +31,62,44,Sandy,Barley,21,0,28,28-28 +35,67,28,Clayey,Pulses,8,7,31,14-35-14 +29,58,30,Red,Tobacco,13,17,16,10-26-26 +27,53,30,Black,Millets,35,0,0,Urea +36,68,50,Loamy,Wheat,12,18,19,10-26-26 +29,58,61,Loamy,Cotton,11,0,38,DAP +30,60,26,Black,Oil seeds,8,9,30,14-35-14 +34,65,45,Clayey,Paddy,6,19,21,10-26-26 +36,68,41,Red,Ground Nuts,41,0,0,Urea +28,54,25,Sandy,Maize,9,10,30,14-35-14 +25,50,32,Clayey,Pulses,24,0,19,28-28 +30,60,27,Red,Tobacco,4,17,17,10-26-26 +38,72,51,Loamy,Wheat,39,0,0,Urea +36,60,43,Sandy,Millets,15,0,41,DAP +29,58,57,Black,Sugarcane,12,0,10,20-20 diff --git a/Fertilizer-Recommendation/main.py b/Fertilizer-Recommendation/main.py new file mode 100644 index 0000000000000000000000000000000000000000..fd1cae98b9797ac75b945bd5fa30517b44b16ad7 --- /dev/null +++ b/Fertilizer-Recommendation/main.py @@ -0,0 +1,8 @@ +from src.pipeline.training_pipeline import start_training_pipeline + +if __name__ =="__main__": + try: + start_training_pipeline() + + except Exception as e: + print(e) \ No newline at end of file diff --git a/Fertilizer-Recommendation/notebook/fertilizer-prediction.ipynb b/Fertilizer-Recommendation/notebook/fertilizer-prediction.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..482baef1d318c5e62a0104df3c3fcc55a7aed756 --- /dev/null +++ b/Fertilizer-Recommendation/notebook/fertilizer-prediction.ipynb @@ -0,0 +1,736 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np \n", + "import matplotlib.pyplot as plt \n", + "import seaborn as sns\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.preprocessing import OneHotEncoder\n", + "from sklearn.preprocessing import LabelEncoder\n", + "from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score\n", + "\n", + "from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "from sklearn.neighbors import KNeighborsClassifier\n", + "from xgboost import XGBClassifier\n", + "from catboost import CatBoostClassifier\n", + "\n", + "from sklearn.compose import ColumnTransformer\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.preprocessing import StandardScaler\n", + "\n", + "import warnings\n", + "\n", + "# Ignore warnings\n", + "warnings.filterwarnings(\"ignore\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.chdir(\"/config/workspace\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TemparatureHumidityMoistureSoil TypeCrop TypeNitrogenPotassiumPhosphorousFertilizer Name
0265238SandyMaize3700Urea
1295245LoamySugarcane12036DAP
2346562BlackCotton793014-35-14
3326234RedTobacco2202028-28
4285446ClayeyPaddy3500Urea
\n", + "
" + ], + "text/plain": [ + " Temparature Humidity Moisture Soil Type Crop Type Nitrogen Potassium \\\n", + "0 26 52 38 Sandy Maize 37 0 \n", + "1 29 52 45 Loamy Sugarcane 12 0 \n", + "2 34 65 62 Black Cotton 7 9 \n", + "3 32 62 34 Red Tobacco 22 0 \n", + "4 28 54 46 Clayey Paddy 35 0 \n", + "\n", + " Phosphorous Fertilizer Name \n", + "0 0 Urea \n", + "1 36 DAP \n", + "2 30 14-35-14 \n", + "3 20 28-28 \n", + "4 0 Urea " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "FILE_PATH =r\"fertilizer-prediction/Fertilizer Prediction.csv\"\n", + "\n", + "# Loading the dataset into pandas\n", + "df = pd.read_csv(FILE_PATH)\n", + "\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Shape of the dataset: (99, 9)\n" + ] + } + ], + "source": [ + "print(f\"Shape of the dataset: {df.shape}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 99 entries, 0 to 98\n", + "Data columns (total 9 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Temparature 99 non-null int64 \n", + " 1 Humidity 99 non-null int64 \n", + " 2 Moisture 99 non-null int64 \n", + " 3 Soil Type 99 non-null object\n", + " 4 Crop Type 99 non-null object\n", + " 5 Nitrogen 99 non-null int64 \n", + " 6 Potassium 99 non-null int64 \n", + " 7 Phosphorous 99 non-null int64 \n", + " 8 Fertilizer Name 99 non-null object\n", + "dtypes: int64(6), object(3)\n", + "memory usage: 7.1+ KB\n" + ] + } + ], + "source": [ + "# datatypes \n", + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Temparature 0\n", + "Humidity 0\n", + "Moisture 0\n", + "Soil Type 0\n", + "Crop Type 0\n", + "Nitrogen 0\n", + "Potassium 0\n", + "Phosphorous 0\n", + "Fertilizer Name 0\n", + "dtype: int64" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# checking for null values \n", + "df.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# checking the data distribution \n", + "plt.figure(figsize=(10, 7))\n", + "\n", + "sns.countplot(data=df, x='Fertilizer Name')\n", + "plt.title(\"Dataset Distribution\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Sandy', 'Loamy', 'Black', 'Red', 'Clayey'], dtype=object)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# soil type\n", + "df['Soil Type'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Maize', 'Sugarcane', 'Cotton', 'Tobacco', 'Paddy', 'Barley',\n", + " 'Wheat', 'Millets', 'Oil seeds', 'Pulses', 'Ground Nuts'],\n", + " dtype=object)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Crop Type'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Shape of the training dataset: (79, 8)\n", + "Shape of the testing dataset: (20, 8)\n" + ] + } + ], + "source": [ + "# splitting the dataset \n", + "X = df.drop(columns=[\"Fertilizer Name\"])\n", + "y = df[\"Fertilizer Name\"]\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "\n", + "print(f\"Shape of the training dataset: {X_train.shape}\")\n", + "print(f\"Shape of the testing dataset: {X_test.shape}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Temparature', 'Humidity ', 'Moisture', 'Nitrogen', 'Potassium', 'Phosphorous']\n" + ] + } + ], + "source": [ + "# numerical columns in the dataset\n", + "print(df._get_numeric_data().columns.tolist())" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Soil Type', 'Crop Type']" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# segrating categorical columns\n", + "categorical_columns = [i for i in df.columns if (i not in df._get_numeric_data().columns) & (i !='Fertilizer Name')]\n", + "categorical_columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Encoding" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "ohe = OneHotEncoder(drop='first')\n", + "standard_scaler = StandardScaler()\n", + "\n", + "preprocessor = ColumnTransformer(\n", + " transformers =[\n", + " ('StandaradScaling', standard_scaler, df._get_numeric_data().columns),\n", + " ('One_hot_encoding', ohe, categorical_columns)\n", + " ],\n", + " remainder='passthrough'\n", + ")\n", + "\n", + "pipeline = Pipeline([\n", + " ('preprocess', preprocessor)\n", + "])" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "X_train_transformed = pipeline.fit_transform(X_train)\n", + "X_test_transformed = pipeline.transform(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "le = LabelEncoder()\n", + "\n", + "y_train_transformed = le.fit_transform(y_train)\n", + "y_test_transformed = le.transform(y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "def evaluate_clf(true, predicted):\n", + " '''\n", + " This function takes in true values and predicted values\n", + " Returns: Accuracy, F1-Score, Precision, Recall, Roc-auc Score\n", + " '''\n", + " acc = accuracy_score(true, predicted)\n", + " f1 = f1_score(true, predicted, average='weighted')\n", + " precision = precision_score(true, predicted, average='weighted')\n", + " recall = recall_score(true, predicted, average='weighted')\n", + " \n", + " return acc, f1, precision, recall" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# create a function which can evaluate models and returns a report \n", + "def evaluate_model(X_train, X_test, y_train, y_test, models):\n", + " '''\n", + " This function takes X_train, X_test, y_train, y_test and models dictionary as input\n", + " Iterate through the given model directory and evaluate metrics\n", + "\n", + " Returns:\n", + " DataFrame which contains report of all models metrics \n", + " '''\n", + "\n", + " model_list = []\n", + " metric_list = []\n", + "\n", + " for i in range(len(list(models))):\n", + " model = list(models.values())[i]\n", + " model.fit(X_train, y_train)\n", + "\n", + " # Make predictions\n", + " y_train_pred = model.predict(X_train)\n", + " y_test_pred = model.predict(X_test)\n", + "\n", + " # Training set performances\n", + " model_train_accuracy, model_train_f1, model_train_precision, \\\n", + " model_train_recall = evaluate_clf(y_train, y_train_pred)\n", + "\n", + " # Test set peformances \n", + " model_test_accuracy, model_test_f1, model_test_precision, \\\n", + " model_test_recall = evaluate_clf(y_test, y_test_pred)\n", + "\n", + " print(list(models.keys())[i])\n", + " model_list.append(list(models.keys())[i])\n", + "\n", + " result_dict ={'model_name':list(models.keys())[i], \n", + " \"train_accuracy\": model_train_accuracy, \"test_accuracy\": model_test_accuracy,\n", + " \"train_precision\": model_train_precision, \"test_precision\": model_test_precision,\n", + " 'train_recall': model_train_recall, \"test_recall\":model_test_recall,\n", + " \"train_f1_score\": model_train_f1, \"test_f1_score\": model_test_f1}\n", + "\n", + " metric_list.append(result_dict)\n", + "\n", + " \n", + " return metric_list\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "# Model Dictionary\n", + "models = {\n", + " \"Random Forest\": RandomForestClassifier(),\n", + " \"Decision Tree\": DecisionTreeClassifier(),\n", + " \"Gradient Boosting\": GradientBoostingClassifier(),\n", + " \"K-Neighbors Classifier\": KNeighborsClassifier(),\n", + " \"XGBClassifier\": XGBClassifier(), \n", + " \"CatBoosting Classifier\": CatBoostClassifier(verbose=False),\n", + " \"AdaBoost Classifier\": AdaBoostClassifier()\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Random Forest\n", + "Decision Tree\n", + "Gradient Boosting\n", + "K-Neighbors Classifier\n", + "XGBClassifier\n", + "CatBoosting Classifier\n", + "AdaBoost Classifier\n" + ] + } + ], + "source": [ + "resultant_metrics = evaluate_model(X_train_transformed, X_test_transformed, y_train_transformed, y_test_transformed, models)\n", + "\n", + "resultant_metrics_df = pd.DataFrame(data=resultant_metrics)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
model_nametrain_accuracytest_accuracytrain_precisiontest_precisiontrain_recalltest_recalltrain_f1_scoretest_f1_score
1Decision Tree1.0000001.001.0000001.0000001.0000001.001.0000001.000000
4XGBClassifier1.0000001.001.0000001.0000001.0000001.001.0000001.000000
5CatBoosting Classifier1.0000001.001.0000001.0000001.0000001.001.0000001.000000
0Random Forest1.0000000.951.0000001.0000001.0000000.951.0000000.966667
2Gradient Boosting1.0000000.951.0000000.9750001.0000000.951.0000000.955556
6AdaBoost Classifier0.5949370.700.4779180.6571430.5949370.700.5041470.662500
3K-Neighbors Classifier0.8987340.650.9045390.6666670.8987340.650.8975990.647727
\n", + "
" + ], + "text/plain": [ + " model_name train_accuracy test_accuracy train_precision \\\n", + "1 Decision Tree 1.000000 1.00 1.000000 \n", + "4 XGBClassifier 1.000000 1.00 1.000000 \n", + "5 CatBoosting Classifier 1.000000 1.00 1.000000 \n", + "0 Random Forest 1.000000 0.95 1.000000 \n", + "2 Gradient Boosting 1.000000 0.95 1.000000 \n", + "6 AdaBoost Classifier 0.594937 0.70 0.477918 \n", + "3 K-Neighbors Classifier 0.898734 0.65 0.904539 \n", + "\n", + " test_precision train_recall test_recall train_f1_score test_f1_score \n", + "1 1.000000 1.000000 1.00 1.000000 1.000000 \n", + "4 1.000000 1.000000 1.00 1.000000 1.000000 \n", + "5 1.000000 1.000000 1.00 1.000000 1.000000 \n", + "0 1.000000 1.000000 0.95 1.000000 0.966667 \n", + "2 0.975000 1.000000 0.95 1.000000 0.955556 \n", + "6 0.657143 0.594937 0.70 0.504147 0.662500 \n", + "3 0.666667 0.898734 0.65 0.897599 0.647727 " + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "resultant_metrics_df = resultant_metrics_df.sort_values(by='test_f1_score', ascending=False)\n", + "resultant_metrics_df" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.8.10 64-bit", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Fertilizer-Recommendation/requirements.txt b/Fertilizer-Recommendation/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..106abbc3ee684d63b3c7962d66e7e60e7487515b --- /dev/null +++ b/Fertilizer-Recommendation/requirements.txt @@ -0,0 +1,58 @@ +asttokens==2.2.1 +backcall==0.2.0 +bleach==6.0.0 +certifi==2023.7.22 +charset-normalizer==3.2.0 +click==8.1.6 +comm==0.1.3 +debugpy==1.6.7 +decorator==5.1.1 +dnspython==2.4.1 +executing==1.2.0 +idna==3.4 +importlib-metadata==6.8.0 +ipykernel==6.25.0 +ipython==8.12.2 +jedi==0.18.2 +jupyter-client==8.3.0 +jupyter-core==5.3.1 +kaggle==1.5.16 +matplotlib-inline==0.1.6 +nest-asyncio==1.5.6 +numpy==1.24.4 +opendatasets==0.1.22 +packaging==23.1 +pandas==2.0.3 +parso==0.8.3 +pexpect==4.8.0 +pickleshare==0.7.5 +platformdirs==3.9.1 +prompt-toolkit==3.0.39 +psutil==5.9.5 +ptyprocess==0.7.0 +pure-eval==0.2.2 +Pygments==2.15.1 +pymongo==4.4.1 +python-dateutil==2.8.2 +python-dotenv==1.0.0 +python-slugify==8.0.1 +pytz==2023.3 +pyzmq==25.1.0 +requests==2.31.0 +six==1.16.0 +stack-data==0.6.2 +text-unidecode==1.3 +tornado==6.3.2 +tqdm==4.65.0 +traitlets==5.9.0 +typing-extensions==4.7.1 +tzdata==2023.3 +urllib3==2.0.4 +wcwidth==0.2.6 +webencodings==0.5.1 +zipp==3.16.2 +scikit-learn +matplotlib +seaborn +pyyaml +dill \ No newline at end of file diff --git a/Fertilizer-Recommendation/saved_models/0/model/model.pkl b/Fertilizer-Recommendation/saved_models/0/model/model.pkl new file mode 100644 index 0000000000000000000000000000000000000000..14f2fc61359f5b05ee0433428a11030698e66505 --- /dev/null +++ b/Fertilizer-Recommendation/saved_models/0/model/model.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c89526de70ba6f924e11e82a344eb581e001228120c575fc73372179b91297ed +size 2808 diff --git a/Fertilizer-Recommendation/saved_models/0/target_encoder/target_encoder.pkl b/Fertilizer-Recommendation/saved_models/0/target_encoder/target_encoder.pkl new file mode 100644 index 0000000000000000000000000000000000000000..b0f0d194e2c65bcfcad15e2d4b55be5577b20789 --- /dev/null +++ b/Fertilizer-Recommendation/saved_models/0/target_encoder/target_encoder.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60d651fde77ec9ae2d704442e566129721089259fef449d7e81620ac286ddc9d +size 338 diff --git a/Fertilizer-Recommendation/saved_models/0/transformer/transformer.pkl b/Fertilizer-Recommendation/saved_models/0/transformer/transformer.pkl new file mode 100644 index 0000000000000000000000000000000000000000..df26c4d70d4171bfe27bc6951498fa67017c79bb --- /dev/null +++ b/Fertilizer-Recommendation/saved_models/0/transformer/transformer.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:427b030f88db787db36098d667cd6fb75e6e6a1d8bb6b504d47d2124b3a10a20 +size 2323 diff --git a/Fertilizer-Recommendation/src/__init__.py b/Fertilizer-Recommendation/src/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Fertilizer-Recommendation/src/app.py b/Fertilizer-Recommendation/src/app.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Fertilizer-Recommendation/src/components/__init__.py b/Fertilizer-Recommendation/src/components/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Fertilizer-Recommendation/src/components/data_ingestion.py b/Fertilizer-Recommendation/src/components/data_ingestion.py new file mode 100644 index 0000000000000000000000000000000000000000..b54ff1a966cc9a6407e37647cb59cda80c9ae94b --- /dev/null +++ b/Fertilizer-Recommendation/src/components/data_ingestion.py @@ -0,0 +1,69 @@ +from src.entity import config_entity +from src.entity import artifact_entity +from src.logger import logging +from src.exception import FertilizerException +from src import utils + +from sklearn.model_selection import train_test_split +import numpy as np +import pandas as pd +import sys +import os + +class DataIngestion: + + def __init__(self, data_ingestion_config:config_entity.DataIngestionConfig): + try: + logging.info(f"\n\n{'>'*50} Data Ingestion {'<'*50}\n") + self.data_ingestion_config = data_ingestion_config + + except Exception as e: + raise FertilizerException(e, sys) + + def initiate_data_ingestion(self) -> artifact_entity.DataIngestionArtifact: + try: + logging.info(f"Exporting collection data as pandas Dataframe ") + + df: pd.DataFrame = utils.get_collection_as_dataframe( + database_name=self.data_ingestion_config.database_name, + collection_name=self.data_ingestion_config.collection_name) + + logging.info(f"Saving data in feature store") + + feature_store_dir = os.path.dirname(self.data_ingestion_config.feature_store_file_path) + os.makedirs(feature_store_dir, exist_ok=True) + + logging.info(f"Saving dataframe into feature store") + df.to_csv(path_or_buf=self.data_ingestion_config.feature_store_file_path, + index=False, + header=True) + + logging.info(f"Split the dataset into train and test") + train_df, test_df = train_test_split( + df, test_size=self.data_ingestion_config.test_size, random_state=42 + ) + + logging.info(f"Create dataset directory if not available") + dataset_dir = os.path.dirname(self.data_ingestion_config.train_file_path) + os.makedirs(dataset_dir, exist_ok=True) + + logging.info(f"Save df to feature store folder") + train_df.to_csv(path_or_buf=self.data_ingestion_config.train_file_path, + index=False, + header=True) + + test_df.to_csv(path_or_buf=self.data_ingestion_config.test_file_path, + index=False, + header=True) + + data_ingestion_artifact = artifact_entity.DataIngestionArtifact( + feature_store_file_path=self.data_ingestion_config.feature_store_file_path, + train_file_path=self.data_ingestion_config.train_file_path, + test_file_path=self.data_ingestion_config.test_file_path) + + logging.info(f"Data Ingestion Completed. Artifacts saved") + + return data_ingestion_artifact + + except Exception as e: + raise FertilizerException(e, sys) \ No newline at end of file diff --git a/Fertilizer-Recommendation/src/components/data_transformation.py b/Fertilizer-Recommendation/src/components/data_transformation.py new file mode 100644 index 0000000000000000000000000000000000000000..8332c8e15c51f9cb311eb60ceee8bf4c166e6598 --- /dev/null +++ b/Fertilizer-Recommendation/src/components/data_transformation.py @@ -0,0 +1,116 @@ +from src.entity import config_entity +from src.entity import artifact_entity +from src.logger import logging +from src.exception import FertilizerException +from typing import Optional +from src import utils +import sys +import os +from src.config import TARGET_COLUMN +from src.config import NUMERICAL_FEATURES +from src.config import CATEGORICAL_FEATURES +from src.config import BASE_FILE_PATH + +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import StandardScaler +from sklearn.preprocessing import LabelEncoder +from sklearn.preprocessing import OneHotEncoder +from sklearn.compose import ColumnTransformer +import pandas as pd +import numpy as np + +class DataTransformation: + + def __init__( + self, + data_transformation_config:config_entity.DataTransformationConfig, + data_ingestion_artifact:artifact_entity.DataIngestionArtifact + ): + + try: + logging.info(f"\n\n{'>'*50} Data transformation Initiated {'<'*50}\n") + self.data_transformation_config = data_transformation_config + self.data_ingestion_artifact = data_ingestion_artifact + + except Exception as e: + raise FertilizerException(e, sys) + + @classmethod + def get_data_transformer_object(cls) -> Pipeline: + try: + standard_scaler = StandardScaler() + one_hot_encoder = OneHotEncoder(drop='first') + + numerical_indices, categorical_indices = utils.get_column_indices( + numerical_features=NUMERICAL_FEATURES, + categorical_features=CATEGORICAL_FEATURES, + base_file_path=BASE_FILE_PATH) + + preprocessor = ColumnTransformer( + transformers =[ + ('StandaradScaling', standard_scaler, numerical_indices), + ('One_hot_encoding', one_hot_encoder, categorical_indices) + ], + remainder='passthrough' + ) + + pipeline = Pipeline([ + ('preprocess', preprocessor) + ]) + + return pipeline + + except Exception as e: + raise FertilizerException(e, sys) + + def initiate_data_transformation(self) -> artifact_entity.DataTransformationArtifact: + try: + # reading training and testing files + train_df = pd.read_csv(self.data_ingestion_artifact.train_file_path) + test_df = pd.read_csv(self.data_ingestion_artifact.test_file_path) + + # selecting input features for train and test dataframe + input_feature_train_df = train_df.drop(TARGET_COLUMN, axis=1) + input_feature_test_df = test_df.drop(TARGET_COLUMN, axis=1) + + # selecting target feature for train and test dataframe + target_feature_train_df = train_df[TARGET_COLUMN] + target_feature_test_df = test_df[TARGET_COLUMN] + + label_encoder = LabelEncoder() + label_encoder.fit(target_feature_train_df) + + # transformation on target column + target_feature_train_arr = label_encoder.transform(target_feature_train_df) + target_feature_test_arr = label_encoder.transform(target_feature_test_df) + + # transforming input features + transformation_pipeline = DataTransformation.get_data_transformer_object() + transformation_pipeline.fit(input_feature_train_df) + + input_feature_train_arr = transformation_pipeline.transform(input_feature_train_df) + input_feature_test_arr = transformation_pipeline.transform(input_feature_test_df) + + train_arr = np.c_[input_feature_train_arr, target_feature_train_arr] + test_arr = np.c_[input_feature_test_arr, target_feature_test_arr] + + # save the numpy array + utils.save_object(file_path=self.data_transformation_config.transformed_train_path, obj=train_arr) + utils.save_object(file_path=self.data_transformation_config.transformed_test_path, obj=test_arr) + + utils.save_object(file_path=self.data_transformation_config.transform_object_path, obj=transformation_pipeline) + + utils.save_object(file_path=self.data_transformation_config.target_encoder_path, obj=label_encoder) + + data_transformation_artifact = artifact_entity.DataTransformationArtifact( + transform_object_path = self.data_transformation_config.transform_object_path, + transformed_train_path = self.data_transformation_config.transformed_train_path, + transformed_test_path = self.data_transformation_config.transformed_test_path, + target_encoder_path = self.data_transformation_config.target_encoder_path) + + logging.info(f"Data transformation Completed") + + return data_transformation_artifact + + except Exception as e: + raise FertilizerException(e, sys) diff --git a/Fertilizer-Recommendation/src/components/data_validation.py b/Fertilizer-Recommendation/src/components/data_validation.py new file mode 100644 index 0000000000000000000000000000000000000000..adfde621624e832192dd53ff4629ab285caa47fd --- /dev/null +++ b/Fertilizer-Recommendation/src/components/data_validation.py @@ -0,0 +1,145 @@ +from src.entity import config_entity +from src.entity import artifact_entity +from src.logger import logging +from src.exception import FertilizerException +from src.config import TARGET_COLUMN +from src import utils + +from typing import Optional +from scipy.stats import ks_2samp +import pandas as pd +import numpy as np +import os +import sys + +class DataValidation: + + def __init__( + self, + data_validation_config: config_entity.DataValidationConfig, + data_ingestion_arfitact: artifact_entity.DataIngestionArtifact + ): + + try: + logging.info(f"\n\n{'>'*50} Data Validation Initiated {'<'*50}\n") + self.data_validation_config = data_validation_config + self.data_ingestion_arfitact = data_ingestion_arfitact + self.validation_error = dict() + + except Exception as e: + raise FertilizerException(e, sys) + + def is_required_colums_exists( + self, + base_df: pd.DataFrame, + current_df: pd.DataFrame, + report_key_name: str + ) -> bool: + try: + base_columns = base_df.columns + current_columns = current_df.columns + + missing_columns = [] + for base_column in base_columns: + if base_column not in current_columns: + logging.info(f"Column: {base_column} is not available") + missing_columns.append(base_column) + + if len(missing_columns) > 0: + self.validation_error[report_key_name] = missing_columns + return False + + return True + + except Exception as e: + raise FertilizerException(e, sys) + + def data_drift( + self, + base_df: pd.DataFrame, + current_df: pd.DataFrame, + report_key_name: str + ): + try: + drift_report = dict() + + base_columns = base_df.columns + current_columns = current_df.columns + + for base_column in base_columns: + base_data, current_data = base_df[base_column], current_df[base_column] + + # Null hypothesis is that both column data drawn from same distribution + + logging.info(f"Hypothesis {base_column}: {base_data.dtype}, {current_data.dtype}") + same_distribution = ks_2samp(base_data, current_data) + + if same_distribution.pvalue > 0.05: + # we are accepting the null hypothesis + drift_report[base_column] = { + "pvalue": float(same_distribution.pvalue), + "same_distribution":True + } + + else: + drift_report[base_column] = { + "pvalue": float(same_distribution.pvalue), + "same_distribution":False + } + self.validation_error[report_key_name] = drift_report + + except Exception as e: + raise FertilizerException(e, sys) + + def initiate_data_validation(self) -> artifact_entity.DataValidationArtifact: + try: + logging.info(f"Reading base dataframe") + base_df = pd.read_csv(self.data_validation_config.base_file_path) + + logging.info(f"Reading train dataframe") + train_df = pd.read_csv(self.data_ingestion_arfitact.train_file_path) + + logging.info(f"Reading test dataframe") + test_df = pd.read_csv(self.data_ingestion_arfitact.test_file_path) + + exclude_column = [TARGET_COLUMN] + base_df = utils.seperate_dependant_column(df=base_df, exclude_column=exclude_column) + train_df = utils.seperate_dependant_column(df=train_df, exclude_column=exclude_column) + test_df = utils.seperate_dependant_column(df=test_df, exclude_column=exclude_column) + + logging.info(f"Is all required columns present in the train_df") + train_df_columns_status = self.is_required_colums_exists( + base_df=base_df, + current_df=train_df, + report_key_name='missing_columns_within_train_dataset') + + test_df_columns_status = self.is_required_colums_exists( + base_df=base_df, + current_df=test_df, + report_key_name='missing_columns_within_test_dataset') + + if train_df_columns_status: + logging.info(f"As all colum are availabel in train_df hence detecting data drift") + + self.data_drift(base_df=base_df, current_df=train_df, report_key_name='data_drift_within_train_dataset') + + if test_df_columns_status: + logging.info(f"As all columns are availabel in test_df hence detecting data drift") + + self.data_drift(base_df=base_df, current_df=test_df, report_key_name='data_drift_within_test_dataset') + + # writting the report + logging.info(f"Writing report in yaml format") + utils.write_yaml_file( + file_path=self.data_validation_config.report_file_path, + data=self.validation_error) + + data_validation_artifact = artifact_entity.DataValidationArtifact( + report_file_path=self.data_validation_config.report_file_path) + + logging.info(f"Data Vadidation Completed. Artifacts saved") + + return data_validation_artifact + + except Exception as e: + raise FertilizerException(e, sys) \ No newline at end of file diff --git a/Fertilizer-Recommendation/src/components/model_evaluation.py b/Fertilizer-Recommendation/src/components/model_evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..962b8814530f4465d7f5ba3f3a3e73f40ba67cfd --- /dev/null +++ b/Fertilizer-Recommendation/src/components/model_evaluation.py @@ -0,0 +1,108 @@ +from src.predictor import ModelResolver +from src.entity import config_entity +from src.entity import artifact_entity +from src.logger import logging +from src.exception import FertilizerException +from src.utils import load_object + +from src.config import TARGET_COLUMN + +from sklearn.metrics import f1_score +import pandas as pd +import numpy as np +import os +import sys + +class ModelEvaluation: + + def __init__( + self, + model_eval_config: config_entity.ModelEvaluationConfig, + data_ingestion_artifact: artifact_entity.DataIngestionArtifact, + data_transformation_artifact: artifact_entity.DataTransformationArtifact, + model_trainer_artifact: artifact_entity.ModelTrainerArtifact + ): + + try: + logging.info(f"\n\n{'>'*50} Model Evaluation Initiated {'<'*50}\n") + self.model_eval_config = model_eval_config + self.data_ingestion_artifact = data_ingestion_artifact + self.data_transformation_artifact = data_transformation_artifact + self.model_trainer_artifact = model_trainer_artifact + self.model_resolver = ModelResolver() + + except Exception as e: + raise FertilizerException(e, sys) + + + def initiate_model_evaluation(self) -> artifact_entity.ModelEvaluationArtifact: + try: + logging.info(f"If the saved model directory contains a model, we will compare which model is best trained:\ + the model from the saved model folder or the new model." + ) + + latest_dir_path = self.model_resolver.get_latest_dir_path() + if latest_dir_path == None: + model_eval_artifact = artifact_entity.ModelEvaluationArtifact(is_model_accepted=True, improved_accuracy=None) + + logging.info(f"Model Evaluation Artifacts: {model_eval_artifact}") + return model_eval_artifact + + # finding location of transformer, model, and target encoder + logging.info(f"Finding location of transformer, model and target encoder") + transformer_path = self.model_resolver.get_latest_transformer_path() + + model_path = self.model_resolver.get_latest_model_path() + + target_encoder_path = self.model_resolver.get_latest_target_encoder_path() + + # finding the location of previous transfomer, model and target encoder + logging.info(f"Previous trained objects of transformer, model and target encoder") + transformer = load_object(file_path=transformer_path) + model = load_object(file_path=model_path) + target_encoder = load_object(file_path=target_encoder_path) + + # finding the location of currently trained objects + logging.info(f"Currently trained model objects") + current_transformer = load_object(file_path=self.data_transformation_artifact.transform_object_path) + + current_model = load_object(file_path=self.model_trainer_artifact.model_path) + + current_target_encoder = load_object(file_path=self.data_transformation_artifact.target_encoder_path) + + # fetching the testing data + test_df = pd.read_csv(self.data_ingestion_artifact.test_file_path) + target_df = test_df[TARGET_COLUMN] + + y_true = target_encoder.transform(target_df) + + # accuracy using previous trained model + input_feature_name = list(transformer.feature_names_in_) + input_arr = transformer.transform(test_df[input_feature_name]) + + y_pred = current_model.predict(input_arr) + y_true = current_target_encoder.transform(target_df) + + previous_model_score = f1_score(y_true=y_true, y_pred=y_pred, average='weighted') + + # accuracy using current model + input_feature_name = list(current_transformer.feature_names_in_) + input_arr = current_transformer.transform(test_df[input_feature_name]) + + y_pred = current_model.predict(input_arr) + y_true = current_target_encoder.transform(target_df) + + current_model_score = f1_score(y_true=y_true, y_pred=y_pred, average='weighted') + + if current_model_score <= previous_model_score: + logging.info(f"Current trained model is not better than previous model") + raise Exception("Current trained model is not better than previous model") + + model_eval_artifact = artifact_entity.ModelEvaluationArtifact(is_model_accepted=True, + improved_accuracy = current_model_score - previous_model_score) + + logging.info(f"Model Eval Artifacts generated") + return model_eval_artifact + + except Exception as e: + raise FertilizerException(e, sys) \ No newline at end of file diff --git a/Fertilizer-Recommendation/src/components/model_pusher.py b/Fertilizer-Recommendation/src/components/model_pusher.py new file mode 100644 index 0000000000000000000000000000000000000000..f31af6670a332924f7cbe43106cfa7c0fd80f09e --- /dev/null +++ b/Fertilizer-Recommendation/src/components/model_pusher.py @@ -0,0 +1,71 @@ +from src.entity import config_entity +from src.entity import artifact_entity +from src.logger import logging +from src.exception import FertilizerException +from src.predictor import ModelResolver +from src.utils import load_object +from src.utils import save_object + +from src.entity.config_entity import ModelPusherConfig + +from src.entity.artifact_entity import DataTransformationArtifact +from src.entity.artifact_entity import ModelTrainerArtifact +from src.entity.artifact_entity import ModelPusherArtifact + +import os +import sys + +class ModelPusher: + + def __init__( + self, + model_pusher_config: ModelPusherConfig, + data_transformation_artifact: DataTransformationArtifact, + model_trainer_artifact: ModelTrainerArtifact + ): + + try: + logging.info(f"\n\n{'>'*50} Model Pusher Initiated {'<'*50}\n") + self.model_pusher_config = model_pusher_config + self.data_transformation_artifact = data_transformation_artifact + self.model_trainer_artifact = model_trainer_artifact + self.model_resolver = ModelResolver(model_registry=self.model_pusher_config.saved_model_dir) + + except Exception as e: + raise FertilizerException(e, sys) + + def initiate_model_pusher(self) -> ModelPusherArtifact: + try: + # load object + logging.info(f"Loading transformer model and target encoder") + transformer = load_object(file_path=self.data_transformation_artifact.transform_object_path) + model = load_object(file_path=self.model_trainer_artifact.model_path) + target_encoder = load_object(file_path=self.data_transformation_artifact.target_encoder_path) + + # model pusher dir + logging.info(f"Saving model into model pusher directory") + save_object(file_path=self.model_pusher_config.pusher_transformer_path, obj=transformer) + save_object(file_path=self.model_pusher_config.pusher_model_path, obj=model) + save_object(file_path=self.model_pusher_config.pusher_target_encoder_path, obj=target_encoder) + + # saved model dir + logging.info(f"Saving model in saved model dir") + + transformer_path = self.model_resolver.get_latest_save_transformer_path() + model_path = self.model_resolver.get_latest_save_model_path() + target_encoder_path = self.model_resolver.get_latest_save_target_encoder_path() + + save_object(file_path=transformer_path, obj=transformer) + save_object(file_path=model_path, obj=model) + save_object(file_path=target_encoder_path, obj=target_encoder) + + model_pusher_artifact = ModelPusherArtifact( + pusher_model_dir = self.model_pusher_config.pusher_model_dir, + saved_model_dir = self.model_pusher_config.saved_model_dir) + + logging.info(f"Model Pusher Artifacts Generated") + + return model_pusher_artifact + + except Exception as e: + raise FertilizerException(e, sys) \ No newline at end of file diff --git a/Fertilizer-Recommendation/src/components/model_trainer.py b/Fertilizer-Recommendation/src/components/model_trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..2d4ec5eb45e2d618c5b65c7122acc6790e460474 --- /dev/null +++ b/Fertilizer-Recommendation/src/components/model_trainer.py @@ -0,0 +1,102 @@ +from src.entity import config_entity +from src.entity import artifact_entity +from src.logger import logging +from src.exception import FertilizerException +from src import utils + +from typing import Optional +from sklearn.metrics import f1_score +from sklearn.tree import DecisionTreeClassifier +import os +import sys + +class ModelTrainer: + + def __init__( + self, + model_trainer_config: config_entity.ModelTrainerConfig, + data_transformation_artifact: artifact_entity.DataTransformationArtifact): + + try: + logging.info(f"\n\n{'>'*50} Model Trainer Initiated {'<'*50}\n") + self.model_trainer_config = model_trainer_config + self.data_transformation_artifact = data_transformation_artifact + + except Exception as e: + raise FertilizerException(e, sys) + + def train_model(self, X, y): + try: + decision_tree_classifier = DecisionTreeClassifier() + decision_tree_classifier.fit(X, y) + + return decision_tree_classifier + + except Exception as e: + raise FertilizerException(e, sys) + + def initial_model_trainer(self) -> artifact_entity.ModelTrainerArtifact: + try: + logging.info(f"Loading train and test array") + + train_arr = utils.load_numpy_array_data(file_path=self.data_transformation_artifact.transformed_train_path) + test_arr = utils.load_numpy_array_data(file_path=self.data_transformation_artifact.transformed_test_path) + + logging.info(f"Splitting the input and target feature from both train and test arr") + + X_train, y_train = train_arr[:, :-1], train_arr[:, -1] + X_test, y_test = test_arr[:, :-1], test_arr[:, -1] + + logging.info(f"Training the model") + model = self.train_model(X = X_train, y = y_train) + + logging.info(f"Calculating the f1 train score") + yhat_train = model.predict(X_train) + + f1_train_score = f1_score(y_true = y_train, + y_pred = yhat_train, + average="weighted") + + logging.info(f"Calculating the f1 test score") + yhat_test = model.predict(X_test) + + f1_test_score = f1_score(y_true = y_test, + y_pred = yhat_test, + average = 'weighted') + + logging.info(f"train_score : {f1_train_score} and test_score : {f1_test_score}") + + # checking for overfitting or underfitting or expected score + logging.info(f"Checking if our model is underfitting or not") + if f1_test_score < self.model_trainer_config.overfitting_threshold: + raise Exception( + f"Model is not good, as it is not able to give \ + expected accuarcy: {self.model_trainer_config.expected_score}, \ + model actual score: {f1_test_score}" + ) + logging.info(f"Checking if our model is overfitting or not") + diff = abs(f1_train_score - f1_test_score) + + if diff > self.model_trainer_config.overfitting_threshold: + raise Exception( + f"Train and test score diff: {diff} \ + is more than overfitting threshold: {self.model_trainer_config.overfitting_threshold}" + ) + + # save the trained model + logging.info(f"Saving model object") + utils.save_object(file_path=self.model_trainer_config.model_path, obj=model) + + # prepare the artifact + logging.info(f"Prepare the artifact") + model_trainer_artifact = artifact_entity.ModelTrainerArtifact( + model_path = self.model_trainer_config.model_path, + f1_train_score = f1_train_score, + f2_test_score = f1_test_score) + + logging.info(f"Model Trainer Complete, Artifact Generated") + + return model_trainer_artifact + + except Exception as e: + raise FertilizerException(e, sys) \ No newline at end of file diff --git a/Fertilizer-Recommendation/src/config.py b/Fertilizer-Recommendation/src/config.py new file mode 100644 index 0000000000000000000000000000000000000000..c600497c7c942333c3c04ff1bf50c72f82d4ed18 --- /dev/null +++ b/Fertilizer-Recommendation/src/config.py @@ -0,0 +1,23 @@ +import pymongo +import pandas as pd +import json +from dataclasses import dataclass +import os +from dotenv import load_dotenv + +load_dotenv() + + +@dataclass +class EnvironmentVariable: + mongo_db_url = os.getenv("MONGO_URL") + + +env = EnvironmentVariable() + +mongo_client = pymongo.MongoClient(env.mongo_db_url) + +TARGET_COLUMN = "Fertilizer Name" +NUMERICAL_FEATURES = ['Temparature', 'Humidity ', 'Moisture', 'Nitrogen', 'Potassium', 'Phosphorous'] +CATEGORICAL_FEATURES = ['Soil Type', 'Crop Type'] +BASE_FILE_PATH = os.path.join("fertilizer-prediction/Fertilizer Prediction.csv") \ No newline at end of file diff --git a/Fertilizer-Recommendation/src/entity/__init__.py b/Fertilizer-Recommendation/src/entity/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Fertilizer-Recommendation/src/entity/artifact_entity.py b/Fertilizer-Recommendation/src/entity/artifact_entity.py new file mode 100644 index 0000000000000000000000000000000000000000..bccdbaed6731b84856e58496612a3d941a4b127f --- /dev/null +++ b/Fertilizer-Recommendation/src/entity/artifact_entity.py @@ -0,0 +1,40 @@ +from dataclasses import dataclass + + +@dataclass +class DataIngestionArtifact: + feature_store_file_path: str + train_file_path: str + test_file_path: str + + +@dataclass +class DataValidationArtifact: + report_file_path: str + + +@dataclass +class DataTransformationArtifact: + transform_object_path: str + transformed_train_path: str + transformed_test_path: str + target_encoder_path: str + + +@dataclass +class ModelTrainerArtifact: + model_path: str + f1_train_score: float + f2_test_score: float + + +@dataclass +class ModelEvaluationArtifact: + is_model_accepted: bool + improved_accuracy: float + + +@dataclass +class ModelPusherArtifact: + pusher_model_dir: str + saved_model_dir: str diff --git a/Fertilizer-Recommendation/src/entity/config_entity.py b/Fertilizer-Recommendation/src/entity/config_entity.py new file mode 100644 index 0000000000000000000000000000000000000000..4f4fc9ceae6a1c09864bc5640254f978580f670a --- /dev/null +++ b/Fertilizer-Recommendation/src/entity/config_entity.py @@ -0,0 +1,120 @@ +import os +import sys +from src.exception import FertilizerException +from src.logger import logging +from datetime import datetime + +FILE_NAME = "fertilizer.csv" +TRAIN_FILE_NAME = "train.csv" +TEST_FILE_NAME = "test.csv" +TRANSFORMER_OBJECT_FILE_NAME = "transformer.pkl" +TARGET_ENCODER_OBJECT_FILE_NAME = "target_encoder.pkl" +MODEL_FILE_NAME = "model.pkl" + + +class TrainingPipelineConfig: + def __init__(self): + try: + self.artifact_dir = os.path.join( + os.getcwd(), "artifact", f"{datetime.now().strftime('%m%d%Y__%H%M%S')}" + ) + except Exception as e: + raise FertilizerException(e, sys) + + +class DataIngestionConfig: + def __init__(self, training_pipeline_config: TrainingPipelineConfig): + try: + self.database_name = "smartcropguard" + self.collection_name = "fertilizer" + self.data_ingestion_dir = os.path.join( + training_pipeline_config.artifact_dir, "data_ingestion" + ) + self.feature_store_file_path = os.path.join( + self.data_ingestion_dir, "feature_store", FILE_NAME + ) + self.train_file_path = os.path.join( + self.data_ingestion_dir, "dataset", TRAIN_FILE_NAME + ) + self.test_file_path = os.path.join( + self.data_ingestion_dir, "dataset", TEST_FILE_NAME + ) + self.test_size = 0.2 + except Exception as e: + raise FertilizerException(e, sys) + + def to_dict(self) -> dict: + try: + return self.__dict__ + except Exception as e: + raise FertilizerException(e, sys) + + +class DataValidationConfig: + def __init__(self, training_pipeline_config: TrainingPipelineConfig): + self.data_validation_dir = os.path.join( + training_pipeline_config.artifact_dir, "data_validation" + ) + self.report_file_path = os.path.join(self.data_validation_dir, "report.yaml") + self.missing_threshold = 0.2 + self.base_file_path = os.path.join( + "fertilizer-prediction/Fertilizer Prediction.csv" + ) + + +class DataTransformationConfig: + def __init__(self, training_pipeline_config: TrainingPipelineConfig): + self.data_transformation_dir = os.path.join( + training_pipeline_config.artifact_dir, "data_transformation" + ) + self.transform_object_path = os.path.join( + self.data_transformation_dir, + "transformer", + TRANSFORMER_OBJECT_FILE_NAME + ) + self.transformed_train_path = os.path.join( + self.data_transformation_dir, + "transformed", + TRAIN_FILE_NAME.replace("csv", "npz"), + ) + self.transformed_test_path = os.path.join( + self.data_transformation_dir, + "transformed", + TEST_FILE_NAME.replace("csv", "npz"), + ) + self.target_encoder_path = os.path.join( + self.data_transformation_dir, + "target_encoder", + TARGET_ENCODER_OBJECT_FILE_NAME, + ) + + +class ModelTrainerConfig: + def __init__(self, training_pipeline_config: TrainingPipelineConfig): + self.model_trainer_dir = os.path.join( + training_pipeline_config.artifact_dir, "model_trainer" + ) + self.model_path = os.path.join(self.model_trainer_dir, "model", MODEL_FILE_NAME) + self.expected_score = 0.9 + self.overfitting_threshold = 0.1 + + +class ModelEvaluationConfig: + def __init__(self, training_pipeline_config: TrainingPipelineConfig): + self.change_threshold = 0.01 + + +class ModelPusherConfig: + def __init__(self, training_pipeline_config: TrainingPipelineConfig): + self.model_pusher_dir = os.path.join( + training_pipeline_config.artifact_dir, "model_pusher" + ) + self.saved_model_dir = os.path.join("saved_models") + self.pusher_model_dir = os.path.join(self.model_pusher_dir, "saved_models") + self.pusher_model_path = os.path.join(self.pusher_model_dir, MODEL_FILE_NAME) + self.pusher_transformer_path = os.path.join( + self.pusher_model_dir, TRANSFORMER_OBJECT_FILE_NAME + ) + self.pusher_target_encoder_path = os.path.join( + self.pusher_model_dir, TARGET_ENCODER_OBJECT_FILE_NAME + ) diff --git a/Fertilizer-Recommendation/src/exception.py b/Fertilizer-Recommendation/src/exception.py new file mode 100644 index 0000000000000000000000000000000000000000..eedae83fcab265b29aece93ace31e14f1694a9ce --- /dev/null +++ b/Fertilizer-Recommendation/src/exception.py @@ -0,0 +1,21 @@ +import sys + + +def error_message_detail(error, error_detail: sys): + _, _, exc_tb = error_detail.exc_info() + file_name = exc_tb.tb_frame.f_code.co_filename + error_message = "Error occurred python script name [{0}] line number [{1}] error message [{2}]".format( + file_name, exc_tb.tb_lineno, str(error) + ) + + return error_message + + +class FertilizerException(Exception): + def __init__(self, error_message, error_detail: sys): + self.error_message = error_message_detail( + error_message, error_detail=error_detail + ) + + def __str__(self): + return self.error_message diff --git a/Fertilizer-Recommendation/src/logger.py b/Fertilizer-Recommendation/src/logger.py new file mode 100644 index 0000000000000000000000000000000000000000..f9535dbdda8d907c724e03c2501e6d650a1afa6f --- /dev/null +++ b/Fertilizer-Recommendation/src/logger.py @@ -0,0 +1,22 @@ +import logging +import os +from datetime import datetime + +# log file name +LOG_FILE_NAME = f"{datetime.now().strftime('%m%d%Y__%H%M%S')}.log" + +# Log directory +LOG_FILE_DIR = os.path.join(os.getcwd(), "logs") + +# create folder if not available +os.makedirs(LOG_FILE_DIR, exist_ok=True) + +# Log file path +LOG_FILE_PATH = os.path.join(LOG_FILE_DIR, LOG_FILE_NAME) + + +logging.basicConfig( + filename=LOG_FILE_PATH, + format="[ %(asctime)s ] %(filename)s - %(lineno)d %(name)s - %(levelname)s - %(message)s", + level=logging.INFO, +) \ No newline at end of file diff --git a/Fertilizer-Recommendation/src/pipeline/__init__.py b/Fertilizer-Recommendation/src/pipeline/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Fertilizer-Recommendation/src/pipeline/training_pipeline.py b/Fertilizer-Recommendation/src/pipeline/training_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..f09660093b26cf42783d893864ace151458c4582 --- /dev/null +++ b/Fertilizer-Recommendation/src/pipeline/training_pipeline.py @@ -0,0 +1,85 @@ +from src.logger import logging +from src.exception import FertilizerException +from src.utils import get_collection_as_dataframe +from src.entity import config_entity +from src.entity import artifact_entity +import os +import sys + +from src.components.data_ingestion import DataIngestion +from src.components.data_validation import DataValidation +from src.components.data_transformation import DataTransformation +from src.components.model_trainer import ModelTrainer +from src.components.model_evaluation import ModelEvaluation +from src.components.model_pusher import ModelPusher + +def start_training_pipeline(): + try: + training_pipeline_config = config_entity.TrainingPipelineConfig() + + # data ingestion + data_ingestion_config = config_entity.DataIngestionConfig( + training_pipeline_config=training_pipeline_config) + + data_ingestion_config.to_dict() + + data_ingestion = DataIngestion( + data_ingestion_config=data_ingestion_config) + + data_ingestion_artifact = data_ingestion.initiate_data_ingestion() + + print(f"Data Ingestin complete") + + # data validation + data_validation_config = config_entity.DataValidationConfig(training_pipeline_config=training_pipeline_config) + + data_validation = DataValidation(data_validation_config=data_validation_config, + data_ingestion_arfitact=data_ingestion_artifact) + + data_validation.initiate_data_validation() + print(f"Data Validation Complete") + + # data transformation + data_transformation_config = config_entity.DataTransformationConfig(training_pipeline_config=training_pipeline_config) + + data_transformation = DataTransformation(data_transformation_config=data_transformation_config, + data_ingestion_artifact=data_ingestion_artifact) + + data_transformation_artifact = data_transformation.initiate_data_transformation() + print(f"Data Transformation Complete") + + # model trainer + model_trainer_config = config_entity.ModelTrainerConfig(training_pipeline_config=training_pipeline_config) + + model_trainer = ModelTrainer(model_trainer_config=model_trainer_config, + data_transformation_artifact=data_transformation_artifact) + + model_trainer_artifact = model_trainer.initial_model_trainer() + print(f"Model Trainer Complete") + + # model evaluation + model_evaluation_config = config_entity.ModelEvaluationConfig(training_pipeline_config=training_pipeline_config) + + model_evaluation = ModelEvaluation( + model_eval_config = model_evaluation_config, + data_ingestion_artifact = data_ingestion_artifact, + data_transformation_artifact = data_transformation_artifact, + model_trainer_artifact = model_trainer_artifact) + + model_evalution_artifact = model_evaluation.initiate_model_evaluation() + print(f"Model Evaluation Complete") + + # model pusher + model_pusher_config = config_entity.ModelPusherConfig(training_pipeline_config=training_pipeline_config) + + model_pusher = ModelPusher( + model_pusher_config = model_pusher_config, + data_transformation_artifact = data_transformation_artifact, + model_trainer_artifact = model_trainer_artifact) + + model_trainer_artifact = model_pusher.initiate_model_pusher() + print(f"Model Pusher Complete") + + except Exception as e: + raise FertilizerException(e, sys) + diff --git a/Fertilizer-Recommendation/src/predictor.py b/Fertilizer-Recommendation/src/predictor.py new file mode 100644 index 0000000000000000000000000000000000000000..e4798f4008d72428852e5fe4493489e45d2b966b --- /dev/null +++ b/Fertilizer-Recommendation/src/predictor.py @@ -0,0 +1,118 @@ +from src.entity.config_entity import TRANSFORMER_OBJECT_FILE_NAME +from src.entity.config_entity import MODEL_FILE_NAME +from src.entity.config_entity import TARGET_ENCODER_OBJECT_FILE_NAME +from src.exception import FertilizerException +from src.logger import logging + +import os +import sys +from glob import glob +from typing import Optional + +class ModelResolver: + + def __init__( + self, + model_registry: str = 'saved_models', + transformer_dir_name = 'transformer', + target_encoder_dir_name = 'target_encoder', + model_dir_name = 'model' + ): + + self.model_registry = model_registry + os.makedirs(self.model_registry, exist_ok=True) + + self.transformer_dir_name = transformer_dir_name + self.target_encoder_dir_name = target_encoder_dir_name + self.model_dir_name = model_dir_name + + def get_latest_dir_path(self) ->Optional[str]: + try: + dir_names = os.listdir(self.model_registry) + + if len(dir_names) == 0: + return None + dir_names = list(map(int, dir_names)) + latest_dir_name = max(dir_names) + + return os.path.join(self.model_registry, f"{latest_dir_name}") + + except Exception as e: + raise FertilizerException(e, sys) + + def get_latest_model_path(self): + try: + latest_dir = self.get_latest_dir_path() + + if latest_dir is None: + raise Exception(f"Model is not available") + + return os.path.join(latest_dir, self.model_dir_name, MODEL_FILE_NAME) + + except Exception as e: + raise FertilizerException(e, sys) + + def get_latest_transformer_path(self): + try: + latest_dir = self.get_latest_dir_path() + if latest_dir is None: + raise Exception(f"Transformer is not available") + + return os.path.join(latest_dir, self.transformer_dir_name, TRANSFORMER_OBJECT_FILE_NAME) + + except Exception as e: + raise FertilizerException(e, sys) + + def get_latest_target_encoder_path(self): + try: + latest_dir = self.get_latest_dir_path() + if latest_dir is None: + raise Exception(f"Target Encoder is not available") + + return os.path.join(latest_dir, self.target_encoder_dir_name, TARGET_ENCODER_OBJECT_FILE_NAME) + + except Exception as e: + raise FertilizerException(e, sys) + + def get_latest_save_dir_path(self): + try: + latest_dir = self.get_latest_dir_path() + + if latest_dir is None: + return os.path.join(self.model_registry, f"{0}") + + latest_dir_num = int(os.path.basename(self.get_latest_dir_path())) + + return os.path.join(self.model_registry, f"{latest_dir_num + 1}") + + except Exception as e: + raise FertilizerException(e, sys) + + def get_latest_save_model_path(self): + try: + latest_dir = self.get_latest_save_dir_path() + + return os.path.join(latest_dir, self.model_dir_name, MODEL_FILE_NAME) + + except Exception as e: + raise FertilizerException(e, sys) + + def get_latest_save_transformer_path(self): + try: + latest_dir = self.get_latest_save_dir_path() + + return os.path.join(latest_dir, self.transformer_dir_name, TRANSFORMER_OBJECT_FILE_NAME) + + except Exception as e: + raise FertilizerException(e, sys) + + def get_latest_save_target_encoder_path(self): + try: + latest_dir = self.get_latest_save_dir_path() + + return os.path.join(latest_dir, self.target_encoder_dir_name, TARGET_ENCODER_OBJECT_FILE_NAME) + + except Exception as e: + raise FertilizerException(e, sys) + + \ No newline at end of file diff --git a/Fertilizer-Recommendation/src/setup.py b/Fertilizer-Recommendation/src/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Fertilizer-Recommendation/src/utils.py b/Fertilizer-Recommendation/src/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..2b68eb2b6fd0a5289c97319bdd941484048e04f8 --- /dev/null +++ b/Fertilizer-Recommendation/src/utils.py @@ -0,0 +1,115 @@ +import pandas as pd +from src.logger import logging +from src.exception import FertilizerException +from src.config import mongo_client +import os +import sys +import numpy as np +import yaml +import dill + +def get_collection_as_dataframe( + database_name: str, collection_name: str +) -> pd.DataFrame: + """ + Description: This function return collection as dataframe + ========================================================= + Params: + database_name: database name + collection_name: collection name + ========================================================= + return Pandas dataframe of a collection + """ + try: + logging.info( + f"Reading data from database: {database_name} and collection: {collection_name}" + ) + df = pd.DataFrame(list(mongo_client[database_name][collection_name].find())) + logging.info(f"{database_name} found in the mongodb") + + if "_id" in df.columns: + logging.info("Dropping column: '_id'") + df = df.drop(columns=["_id"], axis=1) + logging.info(f"Row and columns in df: {df.shape}") + return df + except Exception as e: + raise FertilizerException(e, sys) + + +def seperate_dependant_column(df: pd.DataFrame, exclude_column: list) -> pd.DataFrame: + final_dataframe = df.drop(exclude_column, axis=1) + + return final_dataframe + + +def get_column_indices(numerical_features: list, categorical_features: list, base_file_path: str): + + dataset = pd.read_csv(base_file_path) + + numerical_feature_indices = [dataset.columns.get_loc(feature) for feature in numerical_features] + categorical_feature_indices = [dataset.columns.get_loc(feature) for feature in categorical_features] + + return numerical_feature_indices, categorical_feature_indices + + +def write_yaml_file(file_path, data: dict): + try: + file_dir = os.path.dirname(file_path) + os.makedirs(file_dir, exist_ok=True) + + with open(file_path, "w") as file_writer: + yaml.dump(data, file_writer) + except Exception as e: + raise FertilizerException(e, sys) + + +def save_object(file_path: str, obj: object) -> None: + try: + logging.info("Entered the save object method of utils") + os.makedirs(os.path.dirname(file_path), exist_ok=True) + with open(file_path, "wb") as file_obj: + dill.dump(obj, file_obj) + logging.info("Exited the save object method of utils") + except Exception as e: + raise FertilizerException(e, sys) + + +def load_object(file_path: str) -> object: + try: + if not os.path.exists(file_path): + raise Exception(f"The file: {file_path} is not exists") + with open(file_path, "rb") as file_obj: + return dill.load(file_obj) + except Exception as e: + raise FertilizerException(e, sys) + + +def save_numpy_array_data(file_path: str, array: np.array): + """ + save numpy array data to file + file_path : str location of the file to save + array: np.array data to save + """ + try: + dir_path = os.path.dirname(file_path) + os.makedirs(dir_path, exist_ok=True) + + with open(file_path, "wb") as file_ojb: + np.save(file_obj, array) + + except Exception as e: + raise FertilizerException(e, sys) + + +def load_numpy_array_data(file_path: str) -> np.array: + """ + load numpy array data from file + file_path: str location of file to load + return: np.array data loaded + """ + try: + with open(file_path, "rb") as file_obj: + return np.load(file_obj, allow_pickle=True) + + except Exception as e: + raise CropException(e, sys) diff --git a/Fertilizer-Recommendation/template.py b/Fertilizer-Recommendation/template.py new file mode 100644 index 0000000000000000000000000000000000000000..7a729e7ad747ac979a69d84d3dbb8c976ecbf300 --- /dev/null +++ b/Fertilizer-Recommendation/template.py @@ -0,0 +1,49 @@ +import os, sys +from pathlib import Path +import logging + +while True: + project_name = input("Enter your project name: ") + if project_name !="": + break + +# src/__init__.py +# src/compontes/__init__.py +list_of_files = [ + f"{project_name}/__init__.py", + f"{project_name}/components/__init__.py", + f"{project_name}/components/data_ingestion.py", + f"{project_name}/components/data_validation.py", + f"{project_name}/components/data_transformation.py", + f"{project_name}/components/model_trainer.py", + f"{project_name}/components/model_evaluation.py", + f"{project_name}/components/model_pusher.py", + f"{project_name}/entity/__init__.py", + f"{project_name}/entity/artifact_entity.py", + f"{project_name}/entity/config_entity.py", + f"{project_name}/pipeline/__init__.py", + f"{project_name}/pipeline/training_pipeline.py", + f"{project_name}/config.py", + f"{project_name}/app.py", + f"{project_name}/logger.py", + f"{project_name}/exception.py", + f"{project_name}/setup.py", + f"{project_name}/utils.py", + f"{project_name}/predictor.py", + "main.py", +] + + +for filepth in list_of_files: + filepath = Path(filepth) + filedir, filename = os.path.split(filepath) + + if filedir !="": + os.makedirs(filedir, exist_ok=True) + + if (not os.path.exists(filepath)) or (os.path.getsize(filepath) == 0): + with open(filepath, "w") as f: + pass + + else: + logging.info("file is already present at : {filepath}") \ No newline at end of file diff --git a/README.md b/README.md index be5c80ae535253cad76e20dd7b53bc02d3372302..937ac6c93e0948cde04a9a9081c403240b779b78 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,97 @@ --- -title: C -emoji: 🐠 -colorFrom: pink -colorTo: indigo -sdk: static +title: CropGaurd +emoji: 🏢 +colorFrom: indigo +colorTo: red +sdk: gradio +sdk_version: 3.39.0 +app_file: app.py pinned: false --- -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +# CropGaurd +## Agriculture and Farming Machine Learning Project + +Developed a comprehensive web application that harnesses the power of machine learning to provide valuable insights and recommendations to farmers, agriculture enthusiasts, and stakeholders. + +![CropGaurd-thumbnail](https://github.com/07Sada/CropGaurd/assets/112761379/fd5f1726-7450-4758-952e-23e7f7b9da06) + +# Disclaimer +This project serves as a Proof of Concept (PoC) and is not intended for making actual farming decisions. The data utilized within this project is provided without any guarantee from the creator. Therefore, it is strongly advised not to utilize the information for real-world agricultural choices. Should you choose to do so, please be aware that the creator bears no responsibility for the outcomes. + +It's important to note that this project primarily demonstrates the application of Machine Learning (ML) and Deep Learning (DL) concepts within precision farming. The hypothetical scenario presented here underscores the potential benefits of deploying ML/DL techniques on a larger scale, provided that authentic and verified data sources are used. + +For reliable and accurate farming decisions, always rely on verified agricultural data sources, expert advice, and industry standards. + +## Project Links + +- **Application Link:** Check out the live application on Hugging Face Spaces: [Application Link](https://huggingface.co/spaces/Sadashiv/CropGaurd) + +- **Demo Video:** For a visual walkthrough of the application's features, watch demo video: [Demo Video Link]() + +## Project Overview +The application integrates several key features to assist users in making informed decisions for their agricultural activities. These features include: + +- ***Crop Recommendation System:*** Leveraging advanced machine learning techniques, the system recommends suitable crops based on various factors such as soil chemical contents, and climate conditions. + +- ***Fertilizer Recommendation System:*** The application also offers personalized fertilizer recommendations, ensuring that crops receive the optimal nutrients for healthy growth and abundant yields. + +- ***Plant Disease Classification:*** By employing cutting-edge image classification models, incorporated a feature that enables users to detect and diagnose diseases in plants. Users can simply upload images of their plants, and our system will accurately identify any diseases present and provide relevant information about them. + +- ***Real-time Commodity Price Updates:*** To empower users with current market insights, we have integrated a government API that provides daily commodity prices across different Indian states. This information assists farmers and traders in making pricing and distribution decisions. + +## Purpose +The aim of project to revolutionize the agricultural sector by offering data-driven solutions that enhance productivity, reduce risks, and promote sustainable practices. By amalgamating technology and agriculture, we strive to address critical challenges faced by farmers and contribute to the growth of the farming community. + +Whether you're a seasoned farmer seeking optimized strategies or an individual interested in sustainable agriculture, our application provides the tools you need to make well-informed decisions. + +## Additional Details +Here are some additional aspects of the project that contribute to its effectiveness and uniqueness: + +- ***Machine Learning Models:*** We have trained our recommendation and classification models on extensive datasets specific to Indian agriculture. This ensures that the recommendations and classifications are accurate and relevant to the local context. + +- ***User-Friendly Interface:*** Our web application boasts an intuitive and user-friendly interface designed to make navigation and interaction seamless, even for users with limited technological experience. + +- ***Informational Insights:*** Apart from recommendations, our application provides detailed information about recommended crops, fertilizers, and identified plant diseases. This information helps users understand the rationale behind the suggestions and take well-informed actions. + +- ***Scalability:*** Our project's architecture is designed to accommodate future expansions and enhancements. We are committed to continuously improving the application by incorporating user feedback and integrating emerging technologies. + +## Getting Started +- Clone or download the parent repository from [GitHub Repository Link](https://github.com/07Sada/CropGaurd) + + ``` + git clone --recurse-submodules https://github.com/07Sada/CropGaurd + ``` +- The total project is divided into 4 repositories: one parent repository and 3 child repositories. The child repositories are dedicated to specific functionalities, namely [[crop recommendations](https://github.com/07Sada/crop-recommendation)], [[fertilizer recommendations](https://github.com/07Sada/Fertilizer-Recommendation)], and [[image classification](https://github.com/07Sada/plant-diseases-classifier)]. +- The parent and child repositories are connected using Git submodules. This approach is taken to keep each recommendation system separate, as they contain their end-to-end pipelines – from data ingestion to model training and deploying the best models for inference. +- This modular structure allows us to maintain clean and organized code while efficiently managing updates and changes to each submodule. +- The data ingestion pipeline is flexible, as it is integrated with a MongoDB database. You can set up a scheduler to periodically update the training data. After new data is ingested, the models are trained and evaluated against the existing models. The best model is then pushed for inference, all of which is seamlessly automated through the pipeline, reducing the potential for errors. +- To get started, navigate to the parent repository and install the required dependencies. +- Explore each child repository for more specific details on their functionalities and pipelines. +- Launch the web application by running command in terminal. + + ``` + python app.py + ``` +- Start exploring the features and making use of the insightful recommendations provided. + + +## Further Improvements + +These potential improvements are not only achievable but hold the promise of elevating the application's impact and utility: + +- ***Integration of Govt Policies:*** Imagine having the latest government policies and farmer-centric updates right at your fingertips. Our vision includes seamlessly integrating these critical updates, enabling you to stay informed and navigate regulatory changes with ease. + +- ***Language Translation:*** Empowering users globally is within our reach. We envision breaking language barriers by adding translation capabilities. This means you can explore our insights and recommendations in your preferred language, ensuring accessibility for all. + +- ***Weather Information:*** Harnessing real-time weather data can revolutionize your decision-making. Picture accessing accurate weather information directly within the application, allowing you to adapt and strategize based on changing conditions. + +- ***Enhanced Recommendations with More Data:*** Our recommendation systems already provide valuable guidance, but we're not stopping there. By expanding our dataset, we're poised to fine-tune these systems to deliver recommendations that are even more personalized and effective. + +- ***Market Trends Analysis:*** Envision anticipating market trends and price fluctuations effortlessly. With our potential addition of market trend analysis, you can gain insights that empower you to make informed decisions about your produce's pricing and distribution. + +- ***Community Forums:*** We foresee a thriving community within the application—a space where knowledge is freely exchanged. Imagine being part of a network of farmers, sharing insights, experiences, and innovative approaches to agriculture. + +- ***Automated Data Updates:*** Our dedication to keeping our models up-to-date is unwavering. The potential implementation of automated data updates ensures that you're always working with the latest insights and recommendations. + + diff --git a/__pycache__/app.cpython-38.pyc b/__pycache__/app.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fadf160638e6f8a88b2d33cc03df9cd278cd9593 Binary files /dev/null and b/__pycache__/app.cpython-38.pyc differ diff --git a/__pycache__/artifacts.cpython-311.pyc b/__pycache__/artifacts.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ea89af04923f14a725947b152306fc63d2850652 Binary files /dev/null and b/__pycache__/artifacts.cpython-311.pyc differ diff --git a/__pycache__/artifacts.cpython-38.pyc b/__pycache__/artifacts.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2861a2c6389c00756e175df1e9746f19d6503d2d Binary files /dev/null and b/__pycache__/artifacts.cpython-38.pyc differ diff --git a/__pycache__/config.cpython-311.pyc b/__pycache__/config.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..08232fd791414b30609b455a1fb341ddc9198c8c Binary files /dev/null and b/__pycache__/config.cpython-311.pyc differ diff --git a/__pycache__/config.cpython-38.pyc b/__pycache__/config.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..40e8b146634a716e0ef23ff682594f552e623660 Binary files /dev/null and b/__pycache__/config.cpython-38.pyc differ diff --git a/__pycache__/utils.cpython-311.pyc b/__pycache__/utils.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..37afedf4150ff850b10f59953555bdc49419f649 Binary files /dev/null and b/__pycache__/utils.cpython-311.pyc differ diff --git a/__pycache__/utils.cpython-38.pyc b/__pycache__/utils.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..416ab82801793b8471c017ff744faa32dc770b3c Binary files /dev/null and b/__pycache__/utils.cpython-38.pyc differ diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..852bc096574b47f75d5411a52de211b5cce366ed --- /dev/null +++ b/app.py @@ -0,0 +1,152 @@ +from config import crop_model, crop_pipeline_encoder, crop_label_encoder +from config import fertilizer_model, fertilizer_pipeline_encoder, fertilizer_label_encoder +from config import plant_diseases_classifier_model +from utils import retrieve_image_by_name_from_mongodb, retrieve_data +from flask import Flask, request, render_template, jsonify +import requests +import os +import numpy as np +import base64 + +app = Flask(__name__) + +app.config['UPLOAD_FOLDER'] = 'static/uploaded_image' + +@app.route("/") +@app.route("/home") +def home(): + return render_template('index.html') + +@app.route('/crop_recommendation', methods=['GET', 'POST']) +def crop_recommendation(): + return render_template('crop_recommendation_input.html') + +@app.route("/crop_recommendation_output", methods=['GET', 'POST']) +def crop_recommendation_output(): + temperature = request.form.get("temperature") + humidity = request.form.get("humidity") + ph = request.form.get("ph") + nitrogen = request.form.get("nitrogen") + potassium = request.form.get("potassium") + phosphorous = request.form.get("phosphorous") + rain_fall = request.form.get("rain_fall") + + input_list = [nitrogen, phosphorous, potassium, temperature, humidity, ph, rain_fall] + input_array = np.array(input_list).reshape(-1, 7).astype(int) + + transformed_data = crop_pipeline_encoder.transform(input_array) + model_prediction = crop_model.predict(transformed_data).astype(int) + + label = crop_label_encoder.inverse_transform(model_prediction) + print(label) + + # retrieving the image from mongodb dabase + image_data = retrieve_image_by_name_from_mongodb(database_name=os.getenv("CROP_DB_NAME"), + collection_name=os.getenv("CROP_IMAGE_COLLECTION_NAME"), + file_name=str(label[0])) + + # encoding the byte data recieved from the mongodb + image_data_base64 = base64.b64encode(image_data).decode('utf-8') + + # retrieving text data from mongodb + crop_details = retrieve_data(database_name=os.getenv("CROP_DB_NAME"), collection_name= os.getenv("CROP_INFO_COLLECTION_NAME"), search_query=label[0]) + + return render_template('crop_recommendation_output.html', image_data_base64=image_data_base64, input_file_name=label[0], crop_details=crop_details) + + +@app.route('/fertilizer_recommendation', methods=['GET', 'POST']) +def fertilizer_recommendation(): + return render_template('fertilizer_recommendation_input.html') + +@app.route('/fertilizer_recommendation_output', methods=['GET', 'POST']) +def fertilizer_recommendation_output(): + temperature = request.form.get("temperature") + humidity = request.form.get("humidity") + moisture = request.form.get("moisture") + nitrogen = request.form.get("nitrogen") + potassium = request.form.get("potassium") + phosphorous = request.form.get("phosphorous") + soil_type = request.form.get("soil_type") + crop_type = request.form.get("crop_type") + + input_data = [int(temperature), int(humidity), int(moisture), soil_type, crop_type, int(nitrogen), int(potassium), int(phosphorous)] + input_array = np.array(input_data).reshape(-1, 8) + + transformed_data = fertilizer_pipeline_encoder.transform(input_array) + model_prediction = fertilizer_model.predict(transformed_data).astype(int) + + label = fertilizer_label_encoder.inverse_transform(model_prediction) + + # retrieving the image from mongodb dabase + image_data = retrieve_image_by_name_from_mongodb(database_name=os.getenv("FERTILIZER_DB_NAME"), + collection_name=os.getenv("FERTILIZER_IMAGE_COLLECTION_NAME"), + file_name=str(label[0])) + + # encoding the byte data recieved from the mongodb + image_data_base64 = base64.b64encode(image_data).decode('utf-8') + + # retrieving text data from mongodb + fertilizer_details = retrieve_data(database_name=os.getenv("FERTILIZER_DB_NAME"), collection_name= os.getenv("FERTILIZER_INFO_COLLECTION_NAME"), search_query=label[0]) + + + return render_template('fertilizer_recommendation_ouput.html', image_data_base64=image_data_base64, label= label[0], fertilizer_details=fertilizer_details) + + +@app.route('/image_classification', methods=['GET', 'POST']) +def image_classification(): + return render_template('image_classification_input.html') + +@app.route('/image_classification_output', methods=['GET', 'POST']) +def image_classification_output(): + file = request.files['image_file'] + new_filename = "plant_image.JPG" + file.save(os.path.join(app.config['UPLOAD_FOLDER'], new_filename)) + file_path = os.path.join(app.config['UPLOAD_FOLDER'], new_filename) + + # infercing the with the uploaded image + results = plant_diseases_classifier_model(file_path) + + #fetching all the labels + names_dict = results[0].names + + # fetching the probalility of each class + probs = results[0].probs.data.tolist() + + # selecting class with maximum probability + model_prediction= names_dict[np.argmax(probs)] + + diseases_details = retrieve_data(database_name=os.getenv("DISEASE_DB_NAME"), + collection_name=os.getenv("DISEASE_INFO_COLLECTION_NAME"), + search_query=model_prediction) + + return render_template("image_classification_output.html", model_prediction=model_prediction, diseases_details=diseases_details) + + +@app.route('/market_price') +def market_price(): + return render_template("market_price_input.html") + +@app.route('/market_price_output', methods=['POST']) +def market_price_output(): + # input field name is 'selected_state' + user_input = request.form.get('selected_state') + api_key = os.getenv("COMMODITY_PRICE_API_KEY") + + # Make a request to the API with the user input + api_url = f'https://api.data.gov.in/resource/9ef84268-d588-465a-a308-a864a43d0070?api-key={api_key}&format=json&filters%5Bstate%5D={user_input}' + response = requests.get(api_url) + + if response.status_code == 200: + data = response.json() + data = data['records'] + # return render_template('market_price_output.html', data=data) + if len(data) > 0: + # Return the JSON data as a response + return render_template('market_price_output.html', data=data) + else: + return render_template("market_price_no_data.html") + else: + return jsonify({'error': 'Unable to fetch data from the API'}), 400 + +if __name__ == "__main__": + app.run(debug=True, host="0.0.0.0", port=8000) \ No newline at end of file diff --git a/artifacts.py b/artifacts.py new file mode 100644 index 0000000000000000000000000000000000000000..7480c459c36efeb19b773d5ad67f1087947d7202 --- /dev/null +++ b/artifacts.py @@ -0,0 +1,31 @@ +import os + +MODEL_NAME = "model.pkl" +TARGET_ENCODER_OBJECT_NAME = "target_encoder.pkl" +TRANSFORMER_OJBCET_NAME = "transformer.pkl" + +crop_recommendation_artifacts_path = "./crop-recommendation/saved_models" +fertilizer_recommendation_artifacts_path = "./Fertilizer-Recommendation/saved_models" + +plant_diseases_classifier_model_path = "./plant-diseases-classifier/custom_model_weights/best.pt" + + +## crop recommendation artifacts +latest_crop_recommendation_artifacts = max(os.listdir(crop_recommendation_artifacts_path)) #0, 1, 2 + +latest_crop_recommendation_artifacts_path = os.path.join(crop_recommendation_artifacts_path, latest_crop_recommendation_artifacts) + +crop_model_path = os.path.join(latest_crop_recommendation_artifacts_path, 'model', MODEL_NAME) +crop_transformer_path = os.path.join(latest_crop_recommendation_artifacts_path,'transformer', TRANSFORMER_OJBCET_NAME) +crop_target_encoder_path = os.path.join(latest_crop_recommendation_artifacts_path, 'target_encoder', TARGET_ENCODER_OBJECT_NAME) + + +## fertilizer recommendation artifacts +latest_fertilizer_recommendation_artifacts = max(os.listdir(fertilizer_recommendation_artifacts_path)) #0, 1, 2 + +latest_fertilizer_recommendation_artifacts_path = os.path.join(fertilizer_recommendation_artifacts_path, latest_fertilizer_recommendation_artifacts) + +fertilizer_model_path = os.path.join(latest_fertilizer_recommendation_artifacts_path, 'model', MODEL_NAME) +fertilizer_transformer_path = os.path.join(latest_fertilizer_recommendation_artifacts_path,'transformer', TRANSFORMER_OJBCET_NAME) +fertilizer_target_encoder_path = os.path.join(latest_fertilizer_recommendation_artifacts_path, 'target_encoder', TARGET_ENCODER_OBJECT_NAME) + diff --git a/config.py b/config.py new file mode 100644 index 0000000000000000000000000000000000000000..a28eff0d2b8208120512a026c1a52e087d604500 --- /dev/null +++ b/config.py @@ -0,0 +1,16 @@ +from artifacts import crop_model_path, crop_transformer_path, crop_target_encoder_path +from artifacts import fertilizer_model_path, fertilizer_transformer_path, fertilizer_target_encoder_path +from artifacts import plant_diseases_classifier_model_path + +from utils import load_model_and_encoders +from ultralytics import YOLO + +crop_model, crop_pipeline_encoder, crop_label_encoder = load_model_and_encoders(model_path=crop_model_path, + transformer_path=crop_transformer_path, + target_encoder_path=crop_target_encoder_path) + +fertilizer_model, fertilizer_pipeline_encoder, fertilizer_label_encoder = load_model_and_encoders(model_path=fertilizer_model_path, + transformer_path=fertilizer_transformer_path, + target_encoder_path=fertilizer_target_encoder_path) + +plant_diseases_classifier_model = YOLO(plant_diseases_classifier_model_path) diff --git a/crop-recommendation/.gitignore b/crop-recommendation/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..aa707d116e3cb975cf841540d97f726e397da4f2 --- /dev/null +++ b/crop-recommendation/.gitignore @@ -0,0 +1,168 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +data_dump.py +demo.ipynb +kaggle.json +crop-recommendation-dataset +catboost_info +temp.py +artifact \ No newline at end of file diff --git a/crop-recommendation/.vscode/extensions.json b/crop-recommendation/.vscode/extensions.json new file mode 100644 index 0000000000000000000000000000000000000000..f2443f916752e0f49a4510f1a3fc4761e34b4f45 --- /dev/null +++ b/crop-recommendation/.vscode/extensions.json @@ -0,0 +1,10 @@ +{ + "recommendations": [ + "mongodb.mongodb-vscode", + "ms-python.python", + "ms-toolsai.jupyter", + "ms-toolsai.jupyter-keymap", + "ms-toolsai.jupyter-renderers", + "formulahendry.code-runner" + ] +} diff --git a/crop-recommendation/.vscode/settings.json b/crop-recommendation/.vscode/settings.json new file mode 100644 index 0000000000000000000000000000000000000000..8a2533953bf43d9af48928851a0f5535631fd8ff --- /dev/null +++ b/crop-recommendation/.vscode/settings.json @@ -0,0 +1,8 @@ +{ + "workbench.colorTheme": "Default Dark+", + "workbench.preferredDarkColorTheme": "Default Dark+", + "task.allowAutomaticTasks": "on", + "workbench.editorAssociations": { + "*.md": "vscode.markdown.preview.editor" + } +} diff --git a/crop-recommendation/.vscode/tasks.json b/crop-recommendation/.vscode/tasks.json new file mode 100644 index 0000000000000000000000000000000000000000..df60023539b2e7a04dd3b9cf841b61cc8b851cc6 --- /dev/null +++ b/crop-recommendation/.vscode/tasks.json @@ -0,0 +1,15 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "Installing extensions and dependencies...", + "type": "shell", + "command": "code-server --install-extension mongodb.mongodb-vscode --install-extension ms-python.python --install-extension formulahendry.code-runner && pip install -r requirements.txt", + "presentation": { + "reveal": "always", + "panel": "new" + }, + "runOptions": { "runOn": "folderOpen" } + } + ] +} diff --git a/crop-recommendation/LICENSE b/crop-recommendation/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..e121f2e91d7901e575e89d72fbae0276c0409c4e --- /dev/null +++ b/crop-recommendation/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Sadashiv Nandanikar + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/crop-recommendation/README.md b/crop-recommendation/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7e26f9287ab5d2a0c6b15c04731c5d976acf951f --- /dev/null +++ b/crop-recommendation/README.md @@ -0,0 +1,50 @@ +# Crop Recommendation + +#### Harnessing the capabilities of machine learning models, analyzes specific parameters to suggest the most suitable crops, optimizing yields and efficiency. + +## Demo +### Input Interface +Image 1 + +### Output Interface +Image 1 + +## Data Source +This dataset contains information about the soil and environmental conditions that are ideal for growing different crops. The dataset includes the following columns: + +- `N`: The ratio of nitrogen content in the soil. +- `P`: The ratio of phosphorus content in the soil. +- `K`: The ratio of potassium content in the soil. +- `Temperature`: The temperature in degrees Celsius. +- `Humidity`: The relative humidity in percent. +- `pH`: The pH value of the soil. +- `Rainfall`: The rainfall in millimeters. + +[Link](https://www.kaggle.com/datasets/atharvaingle/crop-recommendation-dataset) for the dataset + +
+ Supported crops + + +- Apple +- Blueberry +- Cherry +- Corn +- Grape +- Pepper +- Orange +- Peach +- Potato +- Soybean +- Strawberry +- Tomato +- Squash +- Raspberry +
+ +## Project Details +This is repository is submodule for [CropGaurd](https://github.com/07Sada/CropGaurd.git) + +## Project PipeLine Stages +![Project PipeLine Stages](https://user-images.githubusercontent.com/112761379/225940480-2a7381b2-6abd-4c1c-8287-0fd49099be8c.jpg) + diff --git a/crop-recommendation/data_download.py b/crop-recommendation/data_download.py new file mode 100644 index 0000000000000000000000000000000000000000..4d119f4492adbeef391e3cc0c24334a7e3d06845 --- /dev/null +++ b/crop-recommendation/data_download.py @@ -0,0 +1,40 @@ +import opendatasets as od +import os +import json +from dotenv import load_dotenv + +# Load variables from .env file +load_dotenv() + +DATASET_URL = "https://www.kaggle.com/datasets/atharvaingle/crop-recommendation-dataset" + +def create_kaggle_json_file(): + # Fetch the username and API key from the .env file + username = os.getenv('username') + key = os.getenv('key') + + kaggle_credentials = { + "username": username, + "key": key + } + + # Path to the kaggle.json file + kaggle_file_path = os.path.join(os.getcwd(), 'kaggle.json') + + # Write the dictionary to the .kaggle/kaggle.json file + with open(kaggle_file_path, 'w') as file: + json.dump(kaggle_credentials, file) + +def remove_kaggle_json_file(): + # Path to the kaggle.json file + kaggle_file_path = os.path.join(os.getcwd(), 'kaggle.json') + + # Remove the kaggle.json file + os.remove(kaggle_file_path) + +create_kaggle_json_file() + +od.download(DATASET_URL) + +# Remove the kaggle.json file after downloading the dataset +remove_kaggle_json_file() \ No newline at end of file diff --git a/crop-recommendation/main.py b/crop-recommendation/main.py new file mode 100644 index 0000000000000000000000000000000000000000..fd1cae98b9797ac75b945bd5fa30517b44b16ad7 --- /dev/null +++ b/crop-recommendation/main.py @@ -0,0 +1,8 @@ +from src.pipeline.training_pipeline import start_training_pipeline + +if __name__ =="__main__": + try: + start_training_pipeline() + + except Exception as e: + print(e) \ No newline at end of file diff --git a/crop-recommendation/notebooks/crop-recommendation-notebook.ipynb b/crop-recommendation/notebooks/crop-recommendation-notebook.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..165fbd5875e756cdb9a5697ca5307872b8126d63 --- /dev/null +++ b/crop-recommendation/notebooks/crop-recommendation-notebook.ipynb @@ -0,0 +1,743 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# importing the dependancies \n", + "import pandas as pd \n", + "import numpy as np \n", + "import matplotlib.pyplot as plt \n", + "import seaborn as sns\n", + "\n", + "from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "from sklearn.neighbors import KNeighborsClassifier\n", + "from xgboost import XGBClassifier\n", + "from catboost import CatBoostClassifier\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dataset Summary\n", + "\n", + "This dataset was build by augmenting datasets of rainfall, climate and fertilizer data available for India.\n", + "\n", + "- `N` - ratio of Nitrogen content in soil\n", + "- `P` - ratio of Phosphorous content in soil\n", + "- `K` - ratio of Potassium content in soil\n", + "- `temperature` - temperature in degree Celsius\n", + "- `humidity` - relative humidity in %\n", + "- `ph` - ph value of the soil\n", + "- `rainfall` - rainfall in mm" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NPKtemperaturehumidityphrainfalllabel
090424320.87974482.0027446.502985202.935536rice
185584121.77046280.3196447.038096226.655537rice
260554423.00445982.3207637.840207263.964248rice
374354026.49109680.1583636.980401242.864034rice
478424220.13017581.6048737.628473262.717340rice
\n", + "
" + ], + "text/plain": [ + " N P K temperature humidity ph rainfall label\n", + "0 90 42 43 20.879744 82.002744 6.502985 202.935536 rice\n", + "1 85 58 41 21.770462 80.319644 7.038096 226.655537 rice\n", + "2 60 55 44 23.004459 82.320763 7.840207 263.964248 rice\n", + "3 74 35 40 26.491096 80.158363 6.980401 242.864034 rice\n", + "4 78 42 42 20.130175 81.604873 7.628473 262.717340 rice" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "FILE_PATH = \"/config/workspace/crop-recommendation-dataset/Crop_recommendation.csv\"\n", + "\n", + "df = pd.read_csv(FILE_PATH)\n", + "\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Shape of the dataset: (2200, 8)\n" + ] + } + ], + "source": [ + "print(f\"Shape of the dataset: {df.shape}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "N 0\n", + "P 0\n", + "K 0\n", + "temperature 0\n", + "humidity 0\n", + "ph 0\n", + "rainfall 0\n", + "label 0\n", + "dtype: int64" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# checking for the null values in the dataset \n", + "df.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NPKtemperaturehumidityphrainfall
count2200.0000002200.0000002200.0000002200.0000002200.0000002200.0000002200.000000
mean50.55181853.36272748.14909125.61624471.4817796.469480103.463655
std36.91733432.98588350.6479315.06374922.2638120.77393854.958389
min0.0000005.0000005.0000008.82567514.2580403.50475220.211267
25%21.00000028.00000020.00000022.76937560.2619535.97169364.551686
50%37.00000051.00000032.00000025.59869380.4731466.42504594.867624
75%84.25000068.00000049.00000028.56165489.9487716.923643124.267508
max140.000000145.000000205.00000043.67549399.9818769.935091298.560117
\n", + "
" + ], + "text/plain": [ + " N P K temperature humidity \\\n", + "count 2200.000000 2200.000000 2200.000000 2200.000000 2200.000000 \n", + "mean 50.551818 53.362727 48.149091 25.616244 71.481779 \n", + "std 36.917334 32.985883 50.647931 5.063749 22.263812 \n", + "min 0.000000 5.000000 5.000000 8.825675 14.258040 \n", + "25% 21.000000 28.000000 20.000000 22.769375 60.261953 \n", + "50% 37.000000 51.000000 32.000000 25.598693 80.473146 \n", + "75% 84.250000 68.000000 49.000000 28.561654 89.948771 \n", + "max 140.000000 145.000000 205.000000 43.675493 99.981876 \n", + "\n", + " ph rainfall \n", + "count 2200.000000 2200.000000 \n", + "mean 6.469480 103.463655 \n", + "std 0.773938 54.958389 \n", + "min 3.504752 20.211267 \n", + "25% 5.971693 64.551686 \n", + "50% 6.425045 94.867624 \n", + "75% 6.923643 124.267508 \n", + "max 9.935091 298.560117 " + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.set(style = \"whitegrid\")\n", + "plt.figure(figsize = (15,7))\n", + "\n", + "ax = sns.countplot(data = df, x= 'label')\n", + "\n", + "ax.set(xlabel='crop name')\n", + "plt.xticks(rotation = 45)\n", + "\n", + "ax.set(ylabel = 'data points')\n", + "\n", + "plt.title(\"Dataset Distribution\", fontsize = 20)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "corr_matrix = df._get_numeric_data().corr()\n", + "\n", + "plt.figure(figsize=(7, 5))\n", + "\n", + "sns.heatmap(corr_matrix, annot=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Shape of the train data: (1760, 7)\n", + "Shape of the test data: (440, 7)\n" + ] + } + ], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "\n", + "X = df.drop(columns=['label'])\n", + "y = df['label']\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "\n", + "print(f\"Shape of the train data: {X_train.shape}\")\n", + "print(f\"Shape of the test data: {X_test.shape}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import LabelEncoder\n", + "\n", + "le = LabelEncoder()\n", + "\n", + "y_train_transformed = le.fit_transform(y_train)\n", + "y_test_transformed = le.transform(y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score\n", + "\n", + "def evaluate_clf(true, predicted):\n", + " '''\n", + " This function takes in true values and predicted values\n", + " Returns: Accuracy, F1-Score, Precision, Recall, Roc-auc Score\n", + " '''\n", + " acc = accuracy_score(true, predicted)\n", + " f1 = f1_score(true, predicted, average='weighted')\n", + " precision = precision_score(true, predicted, average='weighted')\n", + " recall = recall_score(true, predicted, average='weighted')\n", + " \n", + " return acc, f1, precision, recall" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "# create a function which can evaluate models and returns a report \n", + "def evaluate_model(X_train, X_test, y_train, y_test, models):\n", + " '''\n", + " This function takes X_train, X_test, y_train, y_test and models dictionary as input\n", + " Iterate through the given model directory and evaluate metrics\n", + "\n", + " Returns:\n", + " DataFrame which contains report of all models metrics \n", + " '''\n", + "\n", + " model_list = []\n", + " metric_list = []\n", + "\n", + " for i in range(len(list(models))):\n", + " model = list(models.values())[i]\n", + " model.fit(X_train, y_train)\n", + "\n", + " # Make predictions\n", + " y_train_pred = model.predict(X_train)\n", + " y_test_pred = model.predict(X_test)\n", + "\n", + " # Training set performances\n", + " model_train_accuracy, model_train_f1, model_train_precision, \\\n", + " model_train_recall = evaluate_clf(y_train, y_train_pred)\n", + "\n", + " # Test set peformances \n", + " model_test_accuracy, model_test_f1, model_test_precision, \\\n", + " model_test_recall = evaluate_clf(y_test, y_test_pred)\n", + "\n", + " print(list(models.keys())[i])\n", + " model_list.append(list(models.keys())[i])\n", + "\n", + " result_dict ={'model_name':list(models.keys())[i], \n", + " \"train_accuracy\": model_train_accuracy, \"test_accuracy\": model_test_accuracy,\n", + " \"train_precision\": model_train_precision, \"test_precision\": model_test_precision,\n", + " 'train_recall': model_train_recall, \"test_recall\":model_test_recall,\n", + " \"train_f1_score\": model_train_f1, \"test_f1_score\": model_test_f1}\n", + "\n", + " metric_list.append(result_dict)\n", + "\n", + " \n", + " return metric_list\n" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "# Model Dictionary\n", + "models = {\n", + " \"Random Forest\": RandomForestClassifier(),\n", + " \"Decision Tree\": DecisionTreeClassifier(),\n", + " \"Gradient Boosting\": GradientBoostingClassifier(),\n", + " \"K-Neighbors Classifier\": KNeighborsClassifier(),\n", + " \"XGBClassifier\": XGBClassifier(), \n", + " \"CatBoosting Classifier\": CatBoostClassifier(verbose=False),\n", + " \"AdaBoost Classifier\": AdaBoostClassifier()\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "resultant_metrics = evaluate_model(X_train, X_test, y_train_transformed, y_test_transformed, models)\n", + "\n", + "resultant_metrics_df = pd.DataFrame(data=resultant_metrics)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
model_nametrain_accuracytest_accuracytrain_precisiontest_precisiontrain_recalltest_recalltrain_f1_scoretest_f1_score
0Random Forest1.0000000.9931821.0000000.9937351.0000000.9931821.0000000.993175
4XGBClassifier1.0000000.9909091.0000000.9914471.0000000.9909091.0000000.990893
5CatBoosting Classifier1.0000000.9886361.0000000.9898081.0000000.9886361.0000000.988698
2Gradient Boosting1.0000000.9818181.0000000.9842711.0000000.9818181.0000000.981851
1Decision Tree1.0000000.9818181.0000000.9823311.0000000.9818181.0000000.981809
3K-Neighbors Classifier0.9897730.9704550.9901060.9739760.9897730.9704550.9897980.970311
6AdaBoost Classifier0.1920450.1409090.0998620.0712470.1920450.1409090.1182040.085220
\n", + "
" + ], + "text/plain": [ + " model_name train_accuracy test_accuracy train_precision \\\n", + "0 Random Forest 1.000000 0.993182 1.000000 \n", + "4 XGBClassifier 1.000000 0.990909 1.000000 \n", + "5 CatBoosting Classifier 1.000000 0.988636 1.000000 \n", + "2 Gradient Boosting 1.000000 0.981818 1.000000 \n", + "1 Decision Tree 1.000000 0.981818 1.000000 \n", + "3 K-Neighbors Classifier 0.989773 0.970455 0.990106 \n", + "6 AdaBoost Classifier 0.192045 0.140909 0.099862 \n", + "\n", + " test_precision train_recall test_recall train_f1_score test_f1_score \n", + "0 0.993735 1.000000 0.993182 1.000000 0.993175 \n", + "4 0.991447 1.000000 0.990909 1.000000 0.990893 \n", + "5 0.989808 1.000000 0.988636 1.000000 0.988698 \n", + "2 0.984271 1.000000 0.981818 1.000000 0.981851 \n", + "1 0.982331 1.000000 0.981818 1.000000 0.981809 \n", + "3 0.973976 0.989773 0.970455 0.989798 0.970311 \n", + "6 0.071247 0.192045 0.140909 0.118204 0.085220 " + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "resultant_metrics_df = resultant_metrics_df.sort_values(by='test_f1_score', ascending=False)\n", + "resultant_metrics_df" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.8.10 64-bit", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/crop-recommendation/requirements.txt b/crop-recommendation/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..6b9911f4ad971d1150692db33a0f6f0464c48bce --- /dev/null +++ b/crop-recommendation/requirements.txt @@ -0,0 +1,11 @@ +pymongo +pandas +numpy +matplotlib +seaborn +scikit-learn +opendatasets +python-dotenv +ipykernel +PyYAML +dill \ No newline at end of file diff --git a/crop-recommendation/saved_models/0/model/model.pkl b/crop-recommendation/saved_models/0/model/model.pkl new file mode 100644 index 0000000000000000000000000000000000000000..db6c0aed482d69945dd8da6cc692288bff854f90 --- /dev/null +++ b/crop-recommendation/saved_models/0/model/model.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61347ed5e6bbb2060eddc5a515c43e9d61aae5f6f1c7eaecb1f52b64f2df89a5 +size 3676666 diff --git a/crop-recommendation/saved_models/0/target_encoder/target_encoder.pkl b/crop-recommendation/saved_models/0/target_encoder/target_encoder.pkl new file mode 100644 index 0000000000000000000000000000000000000000..d3760e63c70da6a27e954103b18949902f05f124 --- /dev/null +++ b/crop-recommendation/saved_models/0/target_encoder/target_encoder.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4d38ad63d1d6a7008472cfc1a262ce0c847df792eb95e716067c847ea521a30 +size 499 diff --git a/crop-recommendation/saved_models/0/transformer/transformer.pkl b/crop-recommendation/saved_models/0/transformer/transformer.pkl new file mode 100644 index 0000000000000000000000000000000000000000..916ef7d052dc9f0db43cf633f6327c2d104a5b45 --- /dev/null +++ b/crop-recommendation/saved_models/0/transformer/transformer.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8ca70d7ada1f6e8d0cf1594a6963de0ccf973b16d140c091070eca8568b3108 +size 901 diff --git a/crop-recommendation/src/__init__.py b/crop-recommendation/src/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/crop-recommendation/src/components/__init__.py b/crop-recommendation/src/components/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/crop-recommendation/src/components/data_ingestion.py b/crop-recommendation/src/components/data_ingestion.py new file mode 100644 index 0000000000000000000000000000000000000000..1c575c2b8c99e8a372ee1df8cbbe4b4ebfa03a1d --- /dev/null +++ b/crop-recommendation/src/components/data_ingestion.py @@ -0,0 +1,73 @@ +from src.entity import config_entity +from src.entity import artifact_entity +from src.exception import CropException +from src.logger import logging +from src import utils + +from sklearn.model_selection import train_test_split +import numpy as np +import pandas as pd +import sys +import os + + +class DataIngestion: + def __init__(self, data_ingestion_config: config_entity.DataIngestionConfig): + try: + logging.info(f"{'>>'*20} Data Ingestion {'<<'*20}") + self.data_ingestion_config = data_ingestion_config + except Exception as e: + raise CropException(e, sys) + + def initiate_data_ingestion(self) -> artifact_entity.DataIngestionArtifact: + try: + logging.info("Exporting collection data as pandas dataframe") + + df: pd.DataFrame = utils.get_collection_as_dataframe( + database_name=self.data_ingestion_config.database_name, + collection_name=self.data_ingestion_config.collection_name, + ) + + logging.info("Saving data in feature store") + + feature_store_dir = os.path.dirname(self.data_ingestion_config.feature_store_file_path) + os.makedirs(feature_store_dir, exist_ok=True) + + logging.info("Saving dataframe into feature store") + df.to_csv( + path_or_buf=self.data_ingestion_config.feature_store_file_path, + index=False, + header=True, + ) + + logging.info("split dataset into train and test test") + train_df, test_df = train_test_split( + df, test_size=self.data_ingestion_config.test_size, random_state=42 + ) + + logging.info("create dataset directory folder if not available") + dataset_dir = os.path.dirname(self.data_ingestion_config.train_file_path) + os.makedirs(dataset_dir, exist_ok=True) + + logging.info("Save df to feature store folder") + train_df.to_csv( + path_or_buf=self.data_ingestion_config.train_file_path, + index=False, + header=True, + ) + test_df.to_csv( + path_or_buf=self.data_ingestion_config.test_file_path, + index=False, + header=True, + ) + + data_ingestion_artifact = artifact_entity.DataIngestionArtifact( + feature_store_file_path=self.data_ingestion_config.feature_store_file_path, + train_file_path=self.data_ingestion_config.train_file_path, + test_file_path=self.data_ingestion_config.test_file_path, + ) + logging.info(f"Data ingestion artifact: {data_ingestion_artifact}") + return data_ingestion_artifact + + except Exception as e: + raise CropException(error_message=e, error_detail=sys) diff --git a/crop-recommendation/src/components/data_trasformation.py b/crop-recommendation/src/components/data_trasformation.py new file mode 100644 index 0000000000000000000000000000000000000000..60b2f3d8bb9e0fc3ef3acb666de5b147775f868a --- /dev/null +++ b/crop-recommendation/src/components/data_trasformation.py @@ -0,0 +1,113 @@ +from src.entity import artifact_entity +from src.entity import config_entity +from src.logger import logging +from src.exception import CropException +from src import utils +from src.config import TARGET_COLUMN + +from typing import Optional +import os +import sys + +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import LabelEncoder +from sklearn.preprocessing import StandardScaler +import pandas as pd +import numpy as np + + +class DataTransformation: + def __init__( + self, + data_transformation_config: config_entity.DataTransformationConfig, + data_ingestion_artifact: artifact_entity.DataIngestionArtifact, + ): + try: + logging.info(f"{'>'*20} Data Transformation Initiated {'<'*20}") + self.data_transformation_config = data_transformation_config + self.data_ingestion_artifact = data_ingestion_artifact + + except Exception as e: + raise CropException(e, sys) + + @classmethod + def get_data_tranformer_object(cls) -> Pipeline: + try: + standard_scaler = StandardScaler() + + pipeline = Pipeline(steps=[("StandardScaler", standard_scaler)]) + + return pipeline + + except Exception as e: + raise CropException(e, sys) + + def initiate_data_transformation( + self, + ) -> artifact_entity.DataTransformationArtifact: + try: + # reading training and testing file + train_df = pd.read_csv(self.data_ingestion_artifact.train_file_path) + test_df = pd.read_csv(self.data_ingestion_artifact.test_file_path) + + # selecting input features for train and test dataframe + input_feature_train_df = train_df.drop(TARGET_COLUMN, axis=1) + input_feature_test_df = test_df.drop(TARGET_COLUMN, axis=1) + + # selecting target feature for train and test dataframe + target_feature_train_df = train_df[TARGET_COLUMN] + target_feature_test_df = test_df[TARGET_COLUMN] + + label_encoder = LabelEncoder() + label_encoder.fit(target_feature_train_df) + + # transformation on target column + target_feature_train_arr = label_encoder.transform(target_feature_train_df) + target_feature_test_arr = label_encoder.transform(target_feature_test_df) + + # transforming input features + transformation_pipeline = DataTransformation.get_data_tranformer_object() + transformation_pipeline.fit(input_feature_train_df) + + input_feature_train_arr = transformation_pipeline.transform( + input_feature_train_df + ) + input_feature_test_arr = transformation_pipeline.transform( + input_feature_test_df + ) + + train_arr = np.c_[input_feature_train_arr, target_feature_train_arr] + test_arr = np.c_[input_feature_test_arr, target_feature_test_arr] + + # save the numpy array + utils.save_object( + file_path=self.data_transformation_config.transformed_train_path, + obj=train_arr, + ) + utils.save_object( + file_path=self.data_transformation_config.transformed_test_path, + obj=test_arr, + ) + + utils.save_object( + file_path=self.data_transformation_config.transform_object_path, + obj=transformation_pipeline, + ) + + utils.save_object( + file_path=self.data_transformation_config.target_encoder_path, + obj=label_encoder, + ) + + data_transformation_artifact = artifact_entity.DataTransformationArtifact( + transform_object_path=self.data_transformation_config.transform_object_path, + transformed_train_path=self.data_transformation_config.transformed_train_path, + transformed_test_path=self.data_transformation_config.transformed_test_path, + target_encoder_path=self.data_transformation_config.target_encoder_path, + ) + + logging.info(f"Data transformation object : {data_transformation_artifact}") + return data_transformation_artifact + + except Exception as e: + raise CropException(e, sys) diff --git a/crop-recommendation/src/components/data_validation.py b/crop-recommendation/src/components/data_validation.py new file mode 100644 index 0000000000000000000000000000000000000000..f0a99f75e0d5aa476ccbbdf2dd52f978942b8005 --- /dev/null +++ b/crop-recommendation/src/components/data_validation.py @@ -0,0 +1,159 @@ +from src.entity import artifact_entity +from src.entity import config_entity +from src.logger import logging +from src.exception import CropException +from src.config import TARGET_COLUMN +from src import utils + +from typing import Optional +from scipy.stats import ks_2samp +import pandas as pd +import numpy as np +import sys +import os + + +class DataValidation: + def __init__( + self, + data_validation_config: config_entity.DataValidationConfig, + data_ingestion_artifact: artifact_entity.DataIngestionArtifact, + ): + try: + logging.info(f"{'>'*20} Data Validation iniated {'<'*20}") + self.data_validation_config = data_validation_config + self.data_ingestion_artifact = data_ingestion_artifact + self.validation_error = dict() + except Exception as e: + raise CropException(e, sys) + + def is_required_columns_exists( + self, base_df: pd.DataFrame, current_df: pd.DataFrame, report_key_name: str + ) -> bool: + try: + base_columns = base_df.columns + current_columns = current_df.columns + + missing_columns = [] + for base_column in base_columns: + if base_column not in current_columns: + logging.info(f"Column: {base_column} is not available") + missing_columns.append(base_column) + + if len(missing_columns) > 0: + self.validation_error[report_key_name] = missing_columns + return False + + return True + + except Exception as e: + raise CropException(e, sys) + + def data_drift( + self, base_df: pd.DataFrame, current_df: pd.DataFrame, report_key_name: str + ): + try: + drift_report = dict() + + base_columns = base_df.columns + current_columns = current_df.columns + + for base_column in base_columns: + base_data, current_data = base_df[base_column], current_df[base_column] + + # Null hypothesis is that both columns data drawn from same distribution + + logging.info( + f"Hypothesis {base_column} : {base_data.dtype}, {current_data.dtype}" + ) + same_distribution = ks_2samp(base_data, current_data) + + if same_distribution.pvalue > 0.05: + # we are accepting the null hypothesis + drift_report[base_column] = { + "pvalue": float(same_distribution.pvalue), + "same_distribution": True, + } + + else: + drift_report[base_column] = { + "pvalue": float(same_distribution.pvalue), + "same_distribution": False, + } + + self.validation_error[report_key_name] = drift_report + + except Exception as e: + raise CropException(e, sys) + + def initiate_data_validation(self) -> artifact_entity.DataValidationArtifact: + try: + logging.info(f"Reading base dataframe") + base_df = pd.read_csv(self.data_validation_config.base_file_path) + + logging.info(f"Reading train dataframe") + train_df = pd.read_csv(self.data_ingestion_artifact.train_file_path) + + logging.info(f"Reading test dataframe") + test_df = pd.read_csv(self.data_ingestion_artifact.test_file_path) + + exclude_column = [TARGET_COLUMN] + base_df = utils.seperate_dependant_column( + df=base_df, exclude_column=exclude_column + ) + train_df = utils.seperate_dependant_column( + df=train_df, exclude_column=exclude_column + ) + test_df = utils.seperate_dependant_column( + df=test_df, exclude_column=exclude_column + ) + + logging.info(f"Is all required columns present in the train_df") + train_df_columns_status = self.is_required_columns_exists( + base_df=base_df, + current_df=train_df, + report_key_name="missing_columns_within_train_dataset", + ) + + test_df_columns_status = self.is_required_columns_exists( + base_df=base_df, + current_df=test_df, + report_key_name="missing_columns_within_test_dataset", + ) + + if train_df_columns_status: + logging.info( + f"As all column are available in train df hence detecting data drift" + ) + self.data_drift( + base_df=base_df, + current_df=train_df, + report_key_name="data_drift_within_train_dataset", + ) + + if test_df_columns_status: + logging.info( + f"As all column are available in test df hence detecting data drift" + ) + self.data_drift( + base_df=base_df, + current_df=test_df, + report_key_name="data_drift_within_test_dataset", + ) + + # writing the report + logging.info("Writing report in yaml format") + utils.write_yaml_file( + file_path=self.data_validation_config.report_file_path, + data=self.validation_error, + ) + + data_validation_artifact = artifact_entity.DataValidationArtifact( + report_file_path=self.data_validation_config.report_file_path + ) + logging.info(f"Data validation artifact: {data_validation_artifact}") + + return data_validation_artifact + + except Exception as e: + raise CropException(e, sys) diff --git a/crop-recommendation/src/components/model_evaluation.py b/crop-recommendation/src/components/model_evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..880663d93e4e132a944df54e3caafa71b7536bf6 --- /dev/null +++ b/crop-recommendation/src/components/model_evaluation.py @@ -0,0 +1,123 @@ +from src.predictor import ModelResolver +from src.entity import config_entity +from src.entity import artifact_entity +from src.logger import logging +from src.exception import CropException +from src.config import TARGET_COLUMN +from src.utils import load_object + +from sklearn.metrics import f1_score +import pandas as pd +import numpy as np +import os +import sys + + +class ModelEvaluation: + def __init__( + self, + model_eval_config: config_entity.ModelEvaluationConfig, + data_ingesiton_artifact: artifact_entity.DataIngestionArtifact, + data_transformation_artifact: artifact_entity.DataTransformationArtifact, + model_trainer_artifact: artifact_entity.ModelTrainerArtifact, + ): + try: + logging.info(f"{'>'*20} Model Evaluation Initiated {'<'*20}") + self.model_eval_config = model_eval_config + self.data_ingesiton_artifact = data_ingesiton_artifact + self.data_transformation_artifact = data_transformation_artifact + self.model_trainer_artifact = model_trainer_artifact + self.model_resolver = ModelResolver() + + except Exception as e: + raise CropException(e, sys) + + def initiate_model_evaluation(self) -> artifact_entity.ModelEvaluationArtifact: + try: + logging.info( + f"If the saved model directory contains a model, we will compare which model is best trained: \ + the model from the saved model folder or the new model." + ) + + latest_dir_path = self.model_resolver.get_latest_dir_path() + if latest_dir_path == None: + model_eval_artifact = artifact_entity.ModelEvaluationArtifact( + is_model_accepted=True, improved_accuracy=None + ) + logging.info(f"Model evaluation artifact: {model_eval_artifact}") + return model_eval_artifact + + # finding location of transformed model, and target encoder + logging.info(f"Finding location of transformer model and target encoder") + transformer_path = self.model_resolver.get_latest_transformer_path() + + model_path = self.model_resolver.get_latest_model_path() + + target_encoder_path = self.model_resolver.get_latest_target_encoder_path() + + logging.info( + f"Previous trained objects of transformer, model and target encoder" + ) + # previous trained objects + transformer = load_object(file_path=transformer_path) + model = load_object(file_path=model_path) + target_encoder = load_object(file_path=target_encoder_path) + + logging.info(f"Currently trained model objects") + # currently trained model objects + current_transformer = load_object( + file_path=self.data_transformation_artifact.transform_object_path + ) + current_model = load_object( + file_path=self.model_trainer_artifact.model_path + ) + current_target_encoder = load_object( + file_path=self.data_transformation_artifact.target_encoder_path + ) + + test_df = pd.read_csv(self.data_ingesiton_artifact.test_file_path) + target_df = test_df[TARGET_COLUMN] + + y_true = target_encoder.transform(target_df) + + # accuracy using previous trained model + + input_feature_name = list(transformer.feature_names_in_) + input_arr = transformer.transform(test_df[input_feature_name]) + + y_pred = current_model.predict(input_arr) + y_true = current_target_encoder.transform(target_df) + + + previous_model_score = f1_score( + y_true=y_true, y_pred=y_pred, average="weighted" + ) + + # accuracy using current model + input_feature_name = list(current_transformer.feature_names_in_) + input_arr = current_transformer.transform(test_df[input_feature_name]) + + y_pred = current_model.predict(input_arr) + y_true = current_target_encoder.transform(target_df) + + + current_model_score = f1_score( + y_true=y_true, y_pred=y_pred, average="weighted" + ) + + logging.info(f"Accuracy using current trained model: {current_model_score}") + + if current_model_score <= previous_model_score: + logging.info(f"Current trained model is not better than previous model") + raise Exception("Current trained model is not better than previous model") + + model_eval_artifact = artifact_entity.ModelEvaluationArtifact( + is_model_accepted=True, + improved_accuracy=current_model_score - previous_model_score, + ) + logging.info(f"Model Eval artifacts: {model_eval_artifact}") + + return model_eval_artifact + + except Exception as e: + raise CropException(e, sys) diff --git a/crop-recommendation/src/components/model_pusher.py b/crop-recommendation/src/components/model_pusher.py new file mode 100644 index 0000000000000000000000000000000000000000..720c0d7713b99ad53a436269f29a94cbd0c20e15 --- /dev/null +++ b/crop-recommendation/src/components/model_pusher.py @@ -0,0 +1,69 @@ +from src.entity.config_entity import ModelPusherConfig +from src.entity import artifact_entity +from src.predictor import ModelResolver +from src.exception import CropException +from src.logger import logging +from src.utils import load_object, save_object +from src.entity.artifact_entity import ( + DataTransformationArtifact, + ModelTrainerArtifact, + ModelPusherArtifact, +) +import sys +import os + + +class ModelPusher: + def __init__( + self, + model_pusher_config: ModelPusherConfig, + data_transformation_artifact: DataTransformationArtifact, + model_trainer_artifact: ModelTrainerArtifact, + ): + try: + logging.info(f"{'>'*20} Model Pusher Initiated {'<'*30}") + self.model_pusher_config = model_pusher_config + self.data_transformation_artifact = data_transformation_artifact + self.model_trainer_artifact = model_trainer_artifact + self.model_resolver = ModelResolver( + model_registry=self.model_pusher_config.saved_model_dir + ) + except Exception as e: + raise CropException(e, sys) + + def initiate_model_pusher(self) -> ModelPusherArtifact: + try: + # load object + logging.info(f"Loading transformer model and target encoder") + transformer = load_object(file_path=self.data_transformation_artifact.transform_object_path) + model = load_object(file_path=self.model_trainer_artifact.model_path) + target_encoder = load_object(file_path=self.data_transformation_artifact.target_encoder_path) + + # model pusher dir + logging.info(f"Saving model into model pusher directory") + save_object(file_path=self.model_pusher_config.pusher_transformer_path,obj=transformer) + save_object(file_path=self.model_pusher_config.pusher_model_path, obj=model) + save_object(file_path=self.model_pusher_config.pusher_target_encoder_path, obj=target_encoder) + + + # saved model dir + logging.info(f"Saving model in saved model dir") + + transformer_path = self.model_resolver.get_latest_save_transformer_path() + model_path = self.model_resolver.get_latest_save_model_path() + target_encoder_path = self.model_resolver.get_latest_save_target_encoder_path() + + save_object(file_path=transformer_path, obj=transformer) + save_object(file_path=model_path, obj=model) + save_object(file_path=target_encoder_path, obj=target_encoder) + + model_pusher_artifact = ModelPusherArtifact( + pusher_model_dir=self.model_pusher_config.pusher_model_dir, + saved_model_dir=self.model_pusher_config.saved_model_dir, + ) + logging.info(f"Model Pusher artifact: {model_pusher_artifact}") + + return model_pusher_artifact + + except Exception as e: + raise CropException(e, sys) diff --git a/crop-recommendation/src/components/model_trainer.py b/crop-recommendation/src/components/model_trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..36b507e76c72e80a8851b9487ce71fa4075e0b89 --- /dev/null +++ b/crop-recommendation/src/components/model_trainer.py @@ -0,0 +1,107 @@ +from src.entity import config_entity +from src.entity import artifact_entity +from src.logger import logging +from src.exception import CropException +from src import utils + +from typing import Optional +from sklearn.metrics import f1_score +from sklearn.ensemble import RandomForestClassifier +import os +import sys + + +class ModelTrainer: + def __init__( + self, + model_trainer_config: config_entity.ModelTrainerConfig, + data_transformation_artifact: artifact_entity.DataTransformationArtifact, + ): + try: + logging.info(f"{'>'*30} Model Trainer Initiated {'<'*30}") + self.model_trainer_config = model_trainer_config + self.data_transformation_artifact = data_transformation_artifact + + except Exception as e: + raise CropException(e, sys) + + def train_model(self, X, y): + try: + random_forest = RandomForestClassifier() + random_forest.fit(X, y) + + return random_forest + + except Exception as e: + raise CropException(e, sys) + + def initiate_model_trainer(self) -> artifact_entity.ModelTrainerArtifact: + try: + logging.info(f"Loading train and test array") + train_arr = utils.load_numpy_array_data( + file_path=self.data_transformation_artifact.transformed_train_path + ) + test_arr = utils.load_numpy_array_data( + file_path=self.data_transformation_artifact.transformed_test_path + ) + + logging.info( + f"Splitting input and target feature from both train and test arr. " + ) + X_train, y_train = train_arr[:, :-1], train_arr[:, -1] + X_test, y_test = test_arr[:, :-1], test_arr[:, -1] + + logging.info(f"Training the model") + model = self.train_model(X=X_train, y=y_train) + + logging.info(f"Calculating f1 train scrore") + yhat_train = model.predict(X_train) + f1_train_score = f1_score( + y_true=y_train, y_pred=yhat_train, average="weighted" + ) + + logging.info(f"Calculating f1 test score") + yhat_test = model.predict(X_test) + f1_test_score = f1_score( + y_true=y_test, y_pred=yhat_test, average="weighted" + ) + + logging.info( + f"train_score: {f1_train_score} and test score: {f1_test_score}" + ) + + # checking for overfitting or underfitting or expected score + logging.info(f"Checking if out model is underfitting or not") + if f1_test_score < self.model_trainer_config.expected_score: + raise Exception( + f"Model is not good as it is not able to give \ + expected accuracy: {self.model_trainer_config.expected_score}, model actual score: {f1_test_score}" + ) + + logging.info(f"Checking if our model is overfitting or not") + diff = abs(f1_train_score - f1_test_score) + + if diff > self.model_trainer_config.overfitting_threshold: + raise Exception( + f"Train and test score diff: {diff} \ + is more than overfitting threshold: {self.model_trainer_config.overfitting_threshold}" + ) + + # save the trained model + logging.info(f"Saving model object") + utils.save_object(file_path=self.model_trainer_config.model_path, obj=model) + + # prepare artifact + logging.info(f"Prepare the artifact") + model_trainer_artifact = artifact_entity.ModelTrainerArtifact( + model_path=self.model_trainer_config.model_path, + f1_train_score=f1_train_score, + f2_test_score=f1_test_score, + ) + + logging.info(f"Model trainer artifact: {model_trainer_artifact}") + + return model_trainer_artifact + + except Exception as e: + raise CropException(e, sys) diff --git a/crop-recommendation/src/config.py b/crop-recommendation/src/config.py new file mode 100644 index 0000000000000000000000000000000000000000..ead80bdc4da3ec8e4cf938862c6c299bad221a4d --- /dev/null +++ b/crop-recommendation/src/config.py @@ -0,0 +1,20 @@ +import pymongo +import pandas as pd +import json +from dataclasses import dataclass +import os +from dotenv import load_dotenv + +load_dotenv() + + +@dataclass +class EnvironmentVariable: + mongo_db_url = os.getenv("MONGO_URL") + + +env = EnvironmentVariable() + +mongo_client = pymongo.MongoClient(env.mongo_db_url) + +TARGET_COLUMN = "label" diff --git a/crop-recommendation/src/entity/__init__.py b/crop-recommendation/src/entity/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/crop-recommendation/src/entity/artifact_entity.py b/crop-recommendation/src/entity/artifact_entity.py new file mode 100644 index 0000000000000000000000000000000000000000..bccdbaed6731b84856e58496612a3d941a4b127f --- /dev/null +++ b/crop-recommendation/src/entity/artifact_entity.py @@ -0,0 +1,40 @@ +from dataclasses import dataclass + + +@dataclass +class DataIngestionArtifact: + feature_store_file_path: str + train_file_path: str + test_file_path: str + + +@dataclass +class DataValidationArtifact: + report_file_path: str + + +@dataclass +class DataTransformationArtifact: + transform_object_path: str + transformed_train_path: str + transformed_test_path: str + target_encoder_path: str + + +@dataclass +class ModelTrainerArtifact: + model_path: str + f1_train_score: float + f2_test_score: float + + +@dataclass +class ModelEvaluationArtifact: + is_model_accepted: bool + improved_accuracy: float + + +@dataclass +class ModelPusherArtifact: + pusher_model_dir: str + saved_model_dir: str diff --git a/crop-recommendation/src/entity/config_entity.py b/crop-recommendation/src/entity/config_entity.py new file mode 100644 index 0000000000000000000000000000000000000000..09cc331cf215fdcf8985da98471b78fc263cd420 --- /dev/null +++ b/crop-recommendation/src/entity/config_entity.py @@ -0,0 +1,120 @@ +import os +import sys +from src.exception import CropException +from src.logger import logging +from datetime import datetime + +FILE_NAME = "crop.csv" +TRAIN_FILE_NAME = "train.csv" +TEST_FILE_NAME = "test.csv" +TRANSFORMER_OBJECT_FILE_NAME = "transformer.pkl" +TARGET_ENCODER_OBJECT_FILE_NAME = "target_encoder.pkl" +MODEL_FILE_NAME = "model.pkl" + + +class TrainingPipelineConfig: + def __init__(self): + try: + self.artifact_dir = os.path.join( + os.getcwd(), "artifact", f"{datetime.now().strftime('%m%d%Y__%H%M%S')}" + ) + except Exception as e: + raise CropException(e, sys) + + +class DataIngestionConfig: + def __init__(self, training_pipeline_config: TrainingPipelineConfig): + try: + self.database_name = "smartcropguard" + self.collection_name = "crop" + self.data_ingestion_dir = os.path.join( + training_pipeline_config.artifact_dir, "data_ingestion" + ) + self.feature_store_file_path = os.path.join( + self.data_ingestion_dir, "feature_store", FILE_NAME + ) + self.train_file_path = os.path.join( + self.data_ingestion_dir, "dataset", TRAIN_FILE_NAME + ) + self.test_file_path = os.path.join( + self.data_ingestion_dir, "dataset", TEST_FILE_NAME + ) + self.test_size = 0.2 + except Exception as e: + raise CropException(e, sys) + + def to_dict(self) -> dict: + try: + return self.__dict__ + except Exception as e: + raise CropException(e, sys) + + +class DataValidationConfig: + def __init__(self, training_pipeline_config: TrainingPipelineConfig): + self.data_validation_dir = os.path.join( + training_pipeline_config.artifact_dir, "data_validation" + ) + self.report_file_path = os.path.join(self.data_validation_dir, "report.yaml") + self.missing_threshold = 0.2 + self.base_file_path = os.path.join( + "crop-recommendation-dataset/Crop_recommendation.csv" + ) + + +class DataTransformationConfig: + def __init__(self, training_pipeline_config: TrainingPipelineConfig): + self.data_transformation_dir = os.path.join( + training_pipeline_config.artifact_dir, "data_transformation" + ) + self.transform_object_path = os.path.join( + self.data_transformation_dir, + "transformer", + TRANSFORMER_OBJECT_FILE_NAME + ) + self.transformed_train_path = os.path.join( + self.data_transformation_dir, + "transformed", + TRAIN_FILE_NAME.replace("csv", "npz"), + ) + self.transformed_test_path = os.path.join( + self.data_transformation_dir, + "transformed", + TEST_FILE_NAME.replace("csv", "npz"), + ) + self.target_encoder_path = os.path.join( + self.data_transformation_dir, + "target_encoder", + TARGET_ENCODER_OBJECT_FILE_NAME, + ) + + +class ModelTrainerConfig: + def __init__(self, training_pipeline_config: TrainingPipelineConfig): + self.model_trainer_dir = os.path.join( + training_pipeline_config.artifact_dir, "model_trainer" + ) + self.model_path = os.path.join(self.model_trainer_dir, "model", MODEL_FILE_NAME) + self.expected_score = 0.9 + self.overfitting_threshold = 0.1 + + +class ModelEvaluationConfig: + def __init__(self, training_pipeline_config: TrainingPipelineConfig): + self.change_threshold = 0.01 + + +class ModelPusherConfig: + def __init__(self, training_pipeline_config: TrainingPipelineConfig): + self.model_pusher_dir = os.path.join( + training_pipeline_config.artifact_dir, "model_pusher" + ) + self.saved_model_dir = os.path.join("saved_models") + self.pusher_model_dir = os.path.join(self.model_pusher_dir, "saved_models") + self.pusher_model_path = os.path.join(self.pusher_model_dir, MODEL_FILE_NAME) + self.pusher_transformer_path = os.path.join( + self.pusher_model_dir, TRANSFORMER_OBJECT_FILE_NAME + ) + self.pusher_target_encoder_path = os.path.join( + self.pusher_model_dir, TARGET_ENCODER_OBJECT_FILE_NAME + ) diff --git a/crop-recommendation/src/exception.py b/crop-recommendation/src/exception.py new file mode 100644 index 0000000000000000000000000000000000000000..8c8899fddf491aafd4a59ed3209862647cc08ef7 --- /dev/null +++ b/crop-recommendation/src/exception.py @@ -0,0 +1,21 @@ +import sys + + +def error_message_detail(error, error_detail: sys): + _, _, exc_tb = error_detail.exc_info() + file_name = exc_tb.tb_frame.f_code.co_filename + error_message = "Error occurred python script name [{0}] line number [{1}] error message [{2}]".format( + file_name, exc_tb.tb_lineno, str(error) + ) + + return error_message + + +class CropException(Exception): + def __init__(self, error_message, error_detail: sys): + self.error_message = error_message_detail( + error_message, error_detail=error_detail + ) + + def __str__(self): + return self.error_message diff --git a/crop-recommendation/src/logger.py b/crop-recommendation/src/logger.py new file mode 100644 index 0000000000000000000000000000000000000000..40a22c3d961db13409364ed60cb681c6bb37d182 --- /dev/null +++ b/crop-recommendation/src/logger.py @@ -0,0 +1,22 @@ +import logging +import os +from datetime import datetime + +# log file name +LOG_FILE_NAME = f"{datetime.now().strftime('%m%d%Y__%H%M%S')}.log" + +# Log directory +LOG_FILE_DIR = os.path.join(os.getcwd(), "logs") + +# create folder if not available +os.makedirs(LOG_FILE_DIR, exist_ok=True) + +# Log file path +LOG_FILE_PATH = os.path.join(LOG_FILE_DIR, LOG_FILE_NAME) + + +logging.basicConfig( + filename=LOG_FILE_PATH, + format="[ %(asctime)s ] %(filename)s - %(lineno)d %(name)s - %(levelname)s - %(message)s", + level=logging.INFO, +) diff --git a/crop-recommendation/src/pipeline/__init__.py b/crop-recommendation/src/pipeline/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/crop-recommendation/src/pipeline/training_pipeline.py b/crop-recommendation/src/pipeline/training_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..1a14e24f784064e95ae4034971cdcb83adea58d1 --- /dev/null +++ b/crop-recommendation/src/pipeline/training_pipeline.py @@ -0,0 +1,95 @@ +from src.logger import logging +from src.exception import CropException +from src.utils import get_collection_as_dataframe +from src.entity import config_entity +from src.entity import artifact_entity +import sys +from src.components.data_ingestion import DataIngestion +from src.components.data_validation import DataValidation +from src.components.data_trasformation import DataTransformation +from src.components.model_trainer import ModelTrainer +from src.components.model_evaluation import ModelEvaluation +from src.components.model_pusher import ModelPusher + + +def start_training_pipeline(): + try: + training_pipeline_config = config_entity.TrainingPipelineConfig() + + # data ingestion + data_ingestion_config = config_entity.DataIngestionConfig( + training_pipeline_config=training_pipeline_config + ) + data_ingestion_config.to_dict() + + data_ingestion = DataIngestion(data_ingestion_config=data_ingestion_config) + data_ingestion_artifact = data_ingestion.initiate_data_ingestion() + + print(f"Data Ingestion complete") + + # data validation + data_validation_config = config_entity.DataValidationConfig( + training_pipeline_config=training_pipeline_config + ) + + data_validation = DataValidation( + data_validation_config=data_validation_config, + data_ingestion_artifact=data_ingestion_artifact, + ) + + data_validation.initiate_data_validation() + print(f"Data Validation Complete") + + # data transformation + data_transformation_config = config_entity.DataTransformationConfig( + training_pipeline_config=training_pipeline_config + ) + + data_transformation = DataTransformation( + data_transformation_config=data_transformation_config, + data_ingestion_artifact=data_ingestion_artifact, + ) + + data_transformation_artifact = ( + data_transformation.initiate_data_transformation() + ) + print(f"Data Transformation Complete") + + # model trainer + model_trainer_config = config_entity.ModelTrainerConfig( + training_pipeline_config=training_pipeline_config + ) + + model_trainer = ModelTrainer( + model_trainer_config=model_trainer_config, + data_transformation_artifact=data_transformation_artifact, + ) + + model_trainer_artifact = model_trainer.initiate_model_trainer() + print(f"Model Training Complete") + + # model evaluation + model_eval_config = config_entity.ModelEvaluationConfig( + training_pipeline_config=training_pipeline_config + ) + model_eval = ModelEvaluation( + model_eval_config=model_eval_config, + data_ingesiton_artifact=data_ingestion_artifact, + data_transformation_artifact=data_transformation_artifact, + model_trainer_artifact=model_trainer_artifact, + ) + model_eval_artifact = model_eval.initiate_model_evaluation() + print(f"Model Evaluation Complete") + + # Model Puhser + model_pusher_config = config_entity.ModelPusherConfig(training_pipeline_config=training_pipeline_config) + + model_pusher = ModelPusher(model_pusher_config=model_pusher_config, + data_transformation_artifact=data_transformation_config, + model_trainer_artifact=model_trainer_artifact) + + model_pusher_artifact = model_pusher.initiate_model_pusher() + print(f"Model Pusher Complete") + + except Exception as e: + print(e) diff --git a/crop-recommendation/src/predictor.py b/crop-recommendation/src/predictor.py new file mode 100644 index 0000000000000000000000000000000000000000..33acc2ebe1fbee3003b841a48850a6dc1dd0c3b0 --- /dev/null +++ b/crop-recommendation/src/predictor.py @@ -0,0 +1,100 @@ +from src.entity.config_entity import TRANSFORMER_OBJECT_FILE_NAME +from src.entity.config_entity import MODEL_FILE_NAME +from src.entity.config_entity import TARGET_ENCODER_OBJECT_FILE_NAME +from src.exception import CropException +from src.logger import logging + +import os +import sys + +from glob import glob +from typing import Optional + + +class ModelResolver: + def __init__( + self, + model_registry: str = "saved_models", + transformer_dir_name="transformer", + target_encoder_dir_name="target_encoder", + model_dir_name="model", + ): + self.model_registry = model_registry + os.makedirs(self.model_registry, exist_ok=True) + + self.transformer_dir_name = transformer_dir_name + self.target_encoder_dir_name = target_encoder_dir_name + self.model_dir_name = model_dir_name + + def get_latest_dir_path(self) -> Optional[str]: + try: + dir_names = os.listdir(self.model_registry) + if len(dir_names) == 0: + return None + dir_names = list(map(int, dir_names)) + latest_dir_name = max(dir_names) + return os.path.join(self.model_registry, f"{latest_dir_name}") + + except Exception as e: + raise CropException(e, sys) + + def get_latest_model_path(self): + try: + latest_dir = self.get_latest_dir_path() + if latest_dir is None: + raise Exception(f"Model is not available") + return os.path.join(latest_dir, self.model_dir_name, MODEL_FILE_NAME) + except Exception as e: + raise CropException(e, sys) + + def get_latest_transformer_path(self): + try: + latest_dir = self.get_latest_dir_path() + if latest_dir is None: + raise Exception(f"Transformer is not availabel") + return os.path.join(latest_dir, self.transformer_dir_name, TRANSFORMER_OBJECT_FILE_NAME) + except Exception as e: + raise CropException(e, sys) + + def get_latest_target_encoder_path(self): + try: + latest_dir = self.get_latest_dir_path() + if latest_dir is None: + raise Exception(f"Target encoder is not available") + + return os.path.join(latest_dir, self.target_encoder_dir_name, TARGET_ENCODER_OBJECT_FILE_NAME) + + except Exception as e: + raise CropException(e, sys) + + + def get_latest_save_dir_path(self): + try: + latest_dir = self.get_latest_dir_path() + if latest_dir == None: + return os.path.join(self.model_registry, f"{0}") + latest_dir_num = int(os.path.basename(self.get_latest_dir_path())) + return os.path.join(self.model_registry, f"{latest_dir_num + 1}") + except Exception as e: + raise CropException(e, sys) + + def get_latest_save_model_path(self): + try: + latest_dir = self.get_latest_save_dir_path() + return os.path.join(latest_dir, self.model_dir_name, MODEL_FILE_NAME) + except Exception as e: + raise CropException(e, sys) + + def get_latest_save_transformer_path(self): + try: + latest_dir = self.get_latest_save_dir_path() + return os.path.join(latest_dir, self.transformer_dir_name, TRANSFORMER_OBJECT_FILE_NAME) + except Exception as e: + raise CropException(e, sys) + + def get_latest_save_target_encoder_path(self): + try: + latest_dir = self.get_latest_save_dir_path() + return os.path.join(latest_dir, self.target_encoder_dir_name, TARGET_ENCODER_OBJECT_FILE_NAME) + except Exception as e: + raise CropException(e, sys) diff --git a/crop-recommendation/src/utils.py b/crop-recommendation/src/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..9fa8bcff675ac272f7e76d37b333c278b478040f --- /dev/null +++ b/crop-recommendation/src/utils.py @@ -0,0 +1,106 @@ +import pandas as pd +from src.logger import logging +from src.exception import CropException +from src.config import mongo_client +import os +import sys +import numpy as np +import yaml +import dill + + +def get_collection_as_dataframe( + database_name: str, collection_name: str +) -> pd.DataFrame: + """ + Description: This function return collection as dataframe + ========================================================= + Params: + database_name: database name + collection_name: collection name + ========================================================= + return Pandas dataframe of a collection + """ + try: + logging.info( + f"Reading data from database: {database_name} and collection: {collection_name}" + ) + df = pd.DataFrame(list(mongo_client[database_name][collection_name].find())) + logging.info(f"{database_name} found in the mongodb") + + if "_id" in df.columns: + logging.info("Dropping column: '_id'") + df = df.drop(columns=["_id"], axis=1) + logging.info(f"Row and columns in df: {df.shape}") + return df + except Exception as e: + raise CropException(e, sys) + + +def seperate_dependant_column(df: pd.DataFrame, exclude_column: list) -> pd.DataFrame: + final_dataframe = df.drop(exclude_column, axis=1) + + return final_dataframe + + +def write_yaml_file(file_path, data: dict): + try: + file_dir = os.path.dirname(file_path) + os.makedirs(file_dir, exist_ok=True) + + with open(file_path, "w") as file_writer: + yaml.dump(data, file_writer) + except Exception as e: + raise CropException(e, sys) + + +def save_object(file_path: str, obj: object) -> None: + try: + logging.info("Entered the save object method of utils") + os.makedirs(os.path.dirname(file_path), exist_ok=True) + with open(file_path, "wb") as file_obj: + dill.dump(obj, file_obj) + logging.info("Exited the save object method of utils") + except Exception as e: + raise CropException(e, sys) + + +def load_object(file_path: str) -> object: + try: + if not os.path.exists(file_path): + raise Exception(f"The file: {file_path} is not exists") + with open(file_path, "rb") as file_obj: + return dill.load(file_obj) + except Exception as e: + raise CropException(e, sys) + + +def save_numpy_array_data(file_path: str, array: np.array): + """ + save numpy array data to file + file_path : str location of the file to save + array: np.array data to save + """ + try: + dir_path = os.path.dirname(file_path) + os.makedirs(dir_path, exist_ok=True) + + with open(file_path, "wb") as file_ojb: + np.save(file_obj, array) + + except Exception as e: + raise CropException(e, sys) + + +def load_numpy_array_data(file_path: str) -> np.array: + """ + load numpy array data from file + file_path: str location of file to load + return: np.array data loaded + """ + try: + with open(file_path, "rb") as file_obj: + return np.load(file_obj, allow_pickle=True) + + except Exception as e: + raise CropException(e, sys) diff --git a/notebook/mongodb_database.ipynb b/notebook/mongodb_database.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..d62e7ae85d6c459ee949bf6ab1ad6eadbe2caa5f --- /dev/null +++ b/notebook/mongodb_database.ipynb @@ -0,0 +1,123 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from pymongo import MongoClient\n", + "from dotenv import load_dotenv\n", + "import os\n", + "\n", + "load_dotenv()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['crop-data',\n", + " 'diseases-data',\n", + " 'fertilizer-data',\n", + " 'inventory',\n", + " 'mongotest',\n", + " 'my_info',\n", + " 'smartcropguard',\n", + " 'taskdb',\n", + " 'admin',\n", + " 'local']" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mongodb_uri = os.getenv(\"MONGO_URL\")\n", + "\n", + "client = MongoClient(mongodb_uri)\n", + "client.list_database_names()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['images.chunks', 'crop-details', 'images.files']\n" + ] + } + ], + "source": [ + "smartcropguard = client[\"crop-data\"]\n", + "\n", + "collection_names = smartcropguard.list_collection_names()\n", + "print(collection_names)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['images.chunks', 'crop-details', 'images.files']" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "crop_data = client['crop-data']\n", + "crop_data_collection_names = crop_data.list_collection_names()\n", + "crop_data_collection_names " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.18" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/plant-diseases-classifier/.gitignore b/plant-diseases-classifier/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..75e98342f638a01a70113e96f15951834f590263 --- /dev/null +++ b/plant-diseases-classifier/.gitignore @@ -0,0 +1,134 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ +artifacts +demo.ipynb +new_demo.ipynb +logs +data_clean.py diff --git a/plant-diseases-classifier/.vscode/extensions.json b/plant-diseases-classifier/.vscode/extensions.json new file mode 100644 index 0000000000000000000000000000000000000000..e96063a38d5b575f4126c72f738c3ab58315031a --- /dev/null +++ b/plant-diseases-classifier/.vscode/extensions.json @@ -0,0 +1,9 @@ +{ + "recommendations": [ + "ms-python.python", + "ms-toolsai.jupyter", + "ms-toolsai.jupyter-keymap", + "ms-toolsai.jupyter-renderers", + "formulahendry.code-runner" + ] +} diff --git a/plant-diseases-classifier/.vscode/settings.json b/plant-diseases-classifier/.vscode/settings.json new file mode 100644 index 0000000000000000000000000000000000000000..7ecf6fd89f583d7735afa16d25efae8c4a8d4c0a --- /dev/null +++ b/plant-diseases-classifier/.vscode/settings.json @@ -0,0 +1,8 @@ +{ + "workbench.colorTheme": "Cobalt2", + "workbench.preferredDarkColorTheme": "Default Dark+", + "task.allowAutomaticTasks": "on", + "workbench.editorAssociations": { + "*.md": "vscode.markdown.preview.editor" + } +} diff --git a/plant-diseases-classifier/.vscode/tasks.json b/plant-diseases-classifier/.vscode/tasks.json new file mode 100644 index 0000000000000000000000000000000000000000..14c026db9918f46c14904820c59b2fbb68c32ce7 --- /dev/null +++ b/plant-diseases-classifier/.vscode/tasks.json @@ -0,0 +1,15 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "Installing extensions and dependencies...", + "type": "shell", + "command": "code-server --install-extension ms-python.python --install-extension formulahendry.code-runner && pip install -r requirements.txt", + "presentation": { + "reveal": "always", + "panel": "new" + }, + "runOptions": { "runOn": "folderOpen" } + } + ] +} diff --git a/plant-diseases-classifier/LICENSE b/plant-diseases-classifier/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..2b1cfb0a20157f3e265b2cfecc105ab35d31b1af --- /dev/null +++ b/plant-diseases-classifier/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Rishav Dash + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/plant-diseases-classifier/README.md b/plant-diseases-classifier/README.md new file mode 100644 index 0000000000000000000000000000000000000000..33f5fd5df6a22a485e0591c91768e6b91aa0cb40 --- /dev/null +++ b/plant-diseases-classifier/README.md @@ -0,0 +1,40 @@ +# Automated Leaf Health Assessment + +### Evaluating leaf health couldn't be easier. Users can upload a leaf image, and the model swiftly determines whether the leaf is in good health or not. + +## Disease Detection: +The model excels at identifying diverse leaf diseases. With training encompassing 38 distinct disease cases, it demonstrates robust detection capabilities across a wide range of plant health issues. + +## Application: +Try the application at huggingface space + +Application [link](https://huggingface.co/spaces/Sadashiv/CropGaurd) + +## Demo: +### Input Page +Image 1 + +### Output Page +Image 1 + +## Dataset: +The project utilizes an image dataset sourced from a Kaggle dataset, [link for dataset](https://www.kaggle.com/datasets/vipoooool/new-plant-diseases-dataset) + +However, to simplify retrieval, the dataset is stored within Hugging Face's platform. [link for dataset](https://huggingface.co/datasets/Sadashiv/Plant-Diseases-Dataset) + +## Technologies Used: +

Languages and Tools:

+

Image 1 + Image 2 + Image 2 + Image 2 + Image 2 + Image 2 + Image 2 + Image 2 +

+ +## Model Architecture: +The Yolov8 model architecture is employed for this project. + +[Link](https://github.com/ultralytics/ultralytics) for official github repository diff --git a/plant-diseases-classifier/custom_model_weights/best.pt b/plant-diseases-classifier/custom_model_weights/best.pt new file mode 100644 index 0000000000000000000000000000000000000000..b87e00feedb932fd470c4b78ff0a7b09c60e4b81 --- /dev/null +++ b/plant-diseases-classifier/custom_model_weights/best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60e259ede3ad072b0e4b16186e5ba00efc51eef51fbcdf9554f9ffd35d715f02 +size 10346016 diff --git a/plant-diseases-classifier/main.py b/plant-diseases-classifier/main.py new file mode 100644 index 0000000000000000000000000000000000000000..34c23e2e7985dd1154a8e9968efd8645f53838b2 --- /dev/null +++ b/plant-diseases-classifier/main.py @@ -0,0 +1,13 @@ +from src.pipeline.training_pipeline import TrainPipeline +from src.exception import PlantException +import sys +import os + +if __name__ =="__main__": + try: + + train_pipeline = TrainPipeline() + train_pipeline.run_pipeline() + + except Exception as e: + raise PlantException(e, sys) diff --git a/plant-diseases-classifier/requirements.txt b/plant-diseases-classifier/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..7159e2f30c08887091aa5d807a78b87bf5cc3861 --- /dev/null +++ b/plant-diseases-classifier/requirements.txt @@ -0,0 +1,2 @@ +huggingface-hub +ultralytics \ No newline at end of file diff --git a/plant-diseases-classifier/setup.py b/plant-diseases-classifier/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/plant-diseases-classifier/src/__init__.py b/plant-diseases-classifier/src/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/plant-diseases-classifier/src/components/__init__.py b/plant-diseases-classifier/src/components/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/plant-diseases-classifier/src/components/data_ingestion.py b/plant-diseases-classifier/src/components/data_ingestion.py new file mode 100644 index 0000000000000000000000000000000000000000..92c07a7d3747e73db49150c2292b58524c56a893 --- /dev/null +++ b/plant-diseases-classifier/src/components/data_ingestion.py @@ -0,0 +1,65 @@ +import os +import sys +from six.moves import urllib +import zipfile +from src.exception import PlantException +from src.logger import logging +from src.entity.config_entity import DataIngestionConfig +from src.entity.artifact_entity import DataIngestionArtifact +from huggingface_hub import hf_hub_download +from tqdm import tqdm + + +class DataIngestion: + def __init__( + self, data_ingestion_config: DataIngestionConfig = DataIngestionConfig() + ): + try: + self.data_ingestion_config = data_ingestion_config + + except Exception as e: + raise PlantException(e, sys) + + def download_dataset(self): + # The path to the downloaded file in the cache. + print(f"Commencing the dataset download from the hub...") + logging.info(f"Commencing the dataset download from the hub...") + + filepath = hf_hub_download(repo_id=self.data_ingestion_config.huggingface_repo_id, + filename=self.data_ingestion_config.huggingface_file_name, + repo_type="dataset") + + # Create the destination directory if it doesn't exist. + os.makedirs(self.data_ingestion_config.feature_store_file_path, exist_ok=True) + + # Save the file to the specified location. + destination_path = os.path.join(self.data_ingestion_config.feature_store_file_path, self.data_ingestion_config.huggingface_file_name) + with open(destination_path, "wb") as f_dest, open(filepath, "rb") as f_src: + f_dest.write(f_src.read()) + + return destination_path + + def extract_and_move_zip(self, zip_file_path): + logging.info(f"Zip file extraction has begun.") + destination_dir=self.data_ingestion_config.dataset_location + + # Extract the zip file. + with zipfile.ZipFile(zip_file_path, "r") as zip_file: + zip_file.extractall(destination_dir) + + logging.info(f"Zip file extraction has complete.") + + def initiate_data_ingestion(self) -> DataIngestionArtifact: + logging.info("Entered the initiate_data_ingestion method of the Data_Ingestion class.") + try: + zip_file_path = self.download_dataset() + self.extract_and_move_zip(zip_file_path=zip_file_path) + + data_ingestion_artifact = DataIngestionArtifact(dataset_path=self.data_ingestion_config.dataset_location, + feature_store_path=zip_file_path) + logging.info("Data Ingestion Artifacts Genereated") + + return data_ingestion_artifact + + except Exception as e: + raise PlantException(e, sys) \ No newline at end of file diff --git a/plant-diseases-classifier/src/components/data_validation.py b/plant-diseases-classifier/src/components/data_validation.py new file mode 100644 index 0000000000000000000000000000000000000000..5a7bbb4ded7762c3bfa3108fa400c979e674e749 --- /dev/null +++ b/plant-diseases-classifier/src/components/data_validation.py @@ -0,0 +1,63 @@ +import os +import sys +import shutil +from src.logger import logging +from src.exception import PlantException +from src.entity.config_entity import DataValidationConfig +from src.entity.artifact_entity import DataIngestionArtifact +from src.entity.artifact_entity import DataValidationArtifact + +class DataValidation: + def __init__(self,data_ingestion_artifact: DataIngestionArtifact, + data_validation_config: DataValidationConfig): + + try: + self.data_ingestion_artifact = data_ingestion_artifact + self.data_validation_config = data_validation_config + + except Exception as e: + raise PlantException(e, sys) + + + def validate_all_files_exist(self) -> bool: + try: + + validation_status = None + all_files = os.listdir(self.data_ingestion_artifact.dataset_path) + logging.info(f"File format we got: {all_files}") + + for file in all_files: + if file not in self.data_validation_config.required_file_list: + validation_status = False + os.makedirs(self.data_validation_config.data_validation_dir, exist_ok=True) + + with open(self.data_validation_config.valid_status_file_dir, "w") as f: + f.write(f"Validation status: {validation_status}") + + else: + validation_status = True + os.makedirs(self.data_validation_config.data_validation_dir, exist_ok=True) + with open(self.data_validation_config.valid_status_file_dir, "w") as f: + f.write(f"Validation status: {validation_status}") + + return validation_status + + except Exception as e: + raise PlantException(e, sys) + + def initiate_data_validation(self) -> DataValidationArtifact: + logging.info("Entered initiate_data_validation method of DataValidation class") + try: + status = self.validate_all_files_exist() + data_validation_artifact = DataValidationArtifact(validation_status=status) + + logging.info("Exited initiate_data_validation method of DataValidation class") + logging.info(f"Data validation artifact: {data_validation_artifact}") + + # if status: + # shutil.copy(self.data_ingestion_artifact.data_zip_file_path, os.getcwd()) + + return data_validation_artifact + + except Exception as e: + raise PlantException(e, sys) \ No newline at end of file diff --git a/plant-diseases-classifier/src/components/model_pusher.py b/plant-diseases-classifier/src/components/model_pusher.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/plant-diseases-classifier/src/components/model_trainer.py b/plant-diseases-classifier/src/components/model_trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..d015d6173c1607d935a7391f7317254da982de1f --- /dev/null +++ b/plant-diseases-classifier/src/components/model_trainer.py @@ -0,0 +1,46 @@ +import os +import sys +import shutil +from ultralytics import YOLO +from src.logger import logging +from src.exception import PlantException +from src.entity.config_entity import ModelTrainerConfig +from src.entity.artifact_entity import ModelTrainerArtifact +from src.entity.artifact_entity import DataIngestionArtifact + + +class ModelTrainer: + + try: + def __init__(self, model_trainer_config:ModelTrainerConfig, data_ingestion_artifact:DataIngestionArtifact): + self.model_trainer_config = model_trainer_config + self.data_ingestion_artifact = data_ingestion_artifact + + def initiate_model_trainer(self) -> ModelTrainerArtifact: + logging.info(f"Removing and existing runs directory from previous training") + os.system("rm -rf runs") + + model_config_file_name = self.model_trainer_config.weight_name.split('.')[0] + print(model_config_file_name) + + os.system(f"yolo task=classify mode=train model={self.model_trainer_config.weight_name} \ + data={self.data_ingestion_artifact.dataset_path} epochs={self.model_trainer_config.no_epochs} \ + imgsz=128 batch={self.model_trainer_config.batch_size} patience={self.model_trainer_config.patience}") + + os.makedirs("custom_model_weights", exist_ok=True) + os.system("cp runs/classify/train/weights/best.pt custom_model_weights/") + + os.makedirs(self.model_trainer_config.model_trainer_dir, exist_ok=True) + + + os.system(f"cp runs/classify/train/weights/best.pt {self.model_trainer_config.model_trainer_dir}/") + + model_trainer_artifact = ModelTrainerArtifact(trained_model_file_path="custom_model_weights/best.pt",) + + logging.info("Exited initiate_model_trainer method of ModelTrainer class") + logging.info(f"Model trainer artifact: {model_trainer_artifact}") + + return model_trainer_artifact + + except Exception as e: + raise PlantException(e, sys) \ No newline at end of file diff --git a/plant-diseases-classifier/src/constant/__init__.py b/plant-diseases-classifier/src/constant/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/plant-diseases-classifier/src/constant/application.py b/plant-diseases-classifier/src/constant/application.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/plant-diseases-classifier/src/constant/training_pipeline/__init__.py b/plant-diseases-classifier/src/constant/training_pipeline/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8ed325e2a4798b2e25d6c997210e0e528c5c41b5 --- /dev/null +++ b/plant-diseases-classifier/src/constant/training_pipeline/__init__.py @@ -0,0 +1,39 @@ +import os + +ARTIFACTS_DIR: str = "artifacts" + +""" +Data Ingestion related constant start with DATA_INGESTION VAR NAME +""" +DATA_INGESTION_DIR_NAME: str = "data_ingestion" + +DATA_INGESTION_FEATURE_STORE_DIR: str = "feature_store" + +HUGGINGFACE_REPO_ID: str = "Sadashiv/Plant-Diseases-Dataset" + +FILE_NAME: str = "Plant-Diseases-Dataset.zip" + +DATA_INGESTION_FILES: str = "dataset" + +""" +Data Validation realted contant start with DATA_VALIDATION VAR NAME +""" + +DATA_VALIDATION_DIR_NAME: str = "data_validation" + +DATA_VALIDATION_STATUS_FILE = 'status.txt' + +DATA_VALIDATION_ALL_REQUIRED_FILES = ["val", "train"] + +""" +MODEL TRAINER related constant start with MODEL_TRAINER var name +""" +MODEL_TRAINER_DIR_NAME: str = "model_trainer" + +MODEL_TRAINER_PRETRAINED_WEIGHT_NAME: str = "yolov8s-cls.pt" + +MODEL_TRAINER_NO_EPOCHS: int = 5 + +MODEL_TRAINER_BATCH_SIZE: int = 16 + +MODEL_TRAINER_PATIENCE: int = 0 diff --git a/plant-diseases-classifier/src/entity/__init__.py b/plant-diseases-classifier/src/entity/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/plant-diseases-classifier/src/entity/artifact_entity.py b/plant-diseases-classifier/src/entity/artifact_entity.py new file mode 100644 index 0000000000000000000000000000000000000000..75c635429b597ef7fcc0b90e484900953418d256 --- /dev/null +++ b/plant-diseases-classifier/src/entity/artifact_entity.py @@ -0,0 +1,14 @@ +from dataclasses import dataclass + +@dataclass +class DataIngestionArtifact: + dataset_path:str + feature_store_path:str + +@dataclass +class DataValidationArtifact: + validation_status: bool + +@dataclass +class ModelTrainerArtifact: + trained_model_file_path: str \ No newline at end of file diff --git a/plant-diseases-classifier/src/entity/config_entity.py b/plant-diseases-classifier/src/entity/config_entity.py new file mode 100644 index 0000000000000000000000000000000000000000..8e6b3aad44ae36099363b4abdd76acde90201cec --- /dev/null +++ b/plant-diseases-classifier/src/entity/config_entity.py @@ -0,0 +1,58 @@ +import os +from dataclasses import dataclass +from datetime import datetime +from src.constant.training_pipeline import * + +TIMESTAMP: str = datetime.now().strftime("%m_%d_%Y__%I_%M_%S") + + +@dataclass +class TrainingPipelineConfig: + artifacts_dir: str = os.path.join(ARTIFACTS_DIR, TIMESTAMP) + + +training_pipeline_config: TrainingPipelineConfig = TrainingPipelineConfig() + + +@dataclass +class DataIngestionConfig: + data_ingestion_dir: str = os.path.join( + training_pipeline_config.artifacts_dir, DATA_INGESTION_DIR_NAME + ) + + feature_store_file_path: str = os.path.join( + data_ingestion_dir, DATA_INGESTION_FEATURE_STORE_DIR + ) + + dataset_location: str = os.path.join( + data_ingestion_dir, DATA_INGESTION_FILES + ) + + huggingface_repo_id: str = HUGGINGFACE_REPO_ID + huggingface_file_name: str = FILE_NAME + + +@dataclass +class DataValidationConfig: + data_validation_dir: str = os.path.join( + training_pipeline_config.artifacts_dir, DATA_VALIDATION_DIR_NAME + ) + + valid_status_file_dir: str = os.path.join(data_validation_dir, DATA_VALIDATION_STATUS_FILE) + + required_file_list = DATA_VALIDATION_ALL_REQUIRED_FILES + + +@dataclass +class ModelTrainerConfig: + model_trainer_dir: str = os.path.join( + training_pipeline_config.artifacts_dir, MODEL_TRAINER_DIR_NAME + ) + + weight_name = MODEL_TRAINER_PRETRAINED_WEIGHT_NAME + + no_epochs = MODEL_TRAINER_NO_EPOCHS + + batch_size = MODEL_TRAINER_BATCH_SIZE + + patience = MODEL_TRAINER_PATIENCE \ No newline at end of file diff --git a/plant-diseases-classifier/src/exception/__init__.py b/plant-diseases-classifier/src/exception/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..acb6eaacc5d10fe714127acbb9f0cc4b1df85727 --- /dev/null +++ b/plant-diseases-classifier/src/exception/__init__.py @@ -0,0 +1,20 @@ +import sys, os + + +def error_message_detail(error, error_detail: sys): + _, _, exc_tb = error_detail.exc_info() # extract the information about the error + file_name = ( + exc_tb.tb_frame.f_code.co_filename + ) # get the file name whrere the error occured + # formating the error msg + error_message = f"Error occured python script name [{file_name}] line number [{exc_tb.tb_lineno}] error message [{str(error)}]" + return error_message + + +# create a custom Exception class that inherits the from built-in Exception class +class PlantException(Exception): + def __init__(self, error_message, error_detail: sys): + self.error_message = error_message_detail(error_message, error_detail) + + def __str__(self): + return self.error_message \ No newline at end of file diff --git a/plant-diseases-classifier/src/logger/__init__.py b/plant-diseases-classifier/src/logger/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5881df9a827d2be8783820f3d8e2c28f1b131beb --- /dev/null +++ b/plant-diseases-classifier/src/logger/__init__.py @@ -0,0 +1,21 @@ +import logging +from datetime import datetime +import os, sys + +# log file name +LOG_FILE_NAME = f"{datetime.now().strftime('%m_%d_%Y__%I_%M_%S')}.log" + +# directory name +LOG_FILE_DIR = os.path.join(os.getcwd(), "logs") + +# create folder if not exists +os.makedirs(LOG_FILE_DIR, exist_ok=True) + +# Log file path +LOG_FILE_PATH = os.path.join(LOG_FILE_DIR, LOG_FILE_NAME) + +logging.basicConfig( + filename=LOG_FILE_PATH, + format="[ %(asctime)s ] %(filename)s: %(lineno)d %(name)s - %(levelname)s - %(message)s", + level=logging.INFO, +) diff --git a/plant-diseases-classifier/src/pipeline/__init__.py b/plant-diseases-classifier/src/pipeline/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/plant-diseases-classifier/src/pipeline/training_pipeline.py b/plant-diseases-classifier/src/pipeline/training_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..0bfb481053e007b8af649fb24547788bb593d61a --- /dev/null +++ b/plant-diseases-classifier/src/pipeline/training_pipeline.py @@ -0,0 +1,77 @@ +import sys +import os +from src.exception import PlantException +from src.components.data_ingestion import DataIngestion +from src.components.data_validation import DataValidation +from src.entity.config_entity import DataIngestionConfig +from src.entity.config_entity import DataValidationConfig +from src.entity.config_entity import ModelTrainerConfig +from src.entity.artifact_entity import DataIngestionArtifact +from src.entity.artifact_entity import DataValidationArtifact +from src.entity.artifact_entity import ModelTrainerArtifact +from src.components.model_trainer import ModelTrainer +from src.logger import logging + +class TrainPipeline: + + def __init__(self): + self.data_ingestion_config = DataIngestionConfig() + self.data_validation_config = DataValidationConfig() + self.model_trainer_config = ModelTrainerConfig() + + def start_data_ingestion(self) -> DataIngestionArtifact: + try: + logging.info("Entered the start_data_ingestion method of TrainPipeline class") + data_ingestion = DataIngestion(data_ingestion_config=self.data_ingestion_config) + + data_ingestion_artifact = data_ingestion.initiate_data_ingestion() + + logging.info("Exited the start_data_ingestion method of TrainPipeline class") + + return data_ingestion_artifact + + except Exception as e: + raise PlantException(e, sys) + + def start_model_trainer(self) -> ModelTrainerArtifact: + try: + data_ingestion_artifact = self.start_data_ingestion() + model_trainer = ModelTrainer(model_trainer_config=self.model_trainer_config, + data_ingestion_artifact=data_ingestion_artifact) + model_trainer_artifact = model_trainer.initiate_model_trainer() + return model_trainer_artifact + + except Exception as e: + raise PlantException(e, sys) + + def start_data_validation(self, data_ingestion_artifact: DataIngestionArtifact) -> DataValidationArtifact: + logging.info("Entered the start_data_validation method of TrainPipeline class") + try: + + data_validation = DataValidation(data_ingestion_artifact=data_ingestion_artifact, + data_validation_config=self.data_validation_config,) + + data_validation_artifact = data_validation.initiate_data_validation() + + logging.info("Performed the data validation operation") + logging.info("Exited the start_data_validation method of TrainPipeline class") + + return data_validation_artifact + + except Exception as e: + raise PlantException(e, sys) + + + def run_pipeline(self) -> None: + try: + data_ingestion_artifact = self.start_data_ingestion() + data_validation_artifact = self.start_data_validation(data_ingestion_artifact=data_ingestion_artifact) + + if data_validation_artifact.validation_status == True: + model_trainer_artifact = self.start_model_trainer() + + else: + raise Exception("Your data is not in correct format") + + except Exception as e: + raise PlantException(e, sys) diff --git a/plant-diseases-classifier/src/utils/__init__.py b/plant-diseases-classifier/src/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/plant-diseases-classifier/src/utils/main_utils.py b/plant-diseases-classifier/src/utils/main_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/plant-diseases-classifier/template.py b/plant-diseases-classifier/template.py new file mode 100644 index 0000000000000000000000000000000000000000..c5a33fc2c0af5c3c6241f8d05231e7f4f4bdf72c --- /dev/null +++ b/plant-diseases-classifier/template.py @@ -0,0 +1,49 @@ +import os, sys +from pathlib import Path +import logging + +while True: + project_name = input("Enter your project name: ") + if project_name !="": + break + +# src/__init__.py +# src/compontes/__init__.py +list_of_files = [ + f"{project_name}/__init__.py", + f"{project_name}/components/__init__.py", + f"{project_name}/components/data_ingestion.py", + f"{project_name}/components/data_validation.py", + f"{project_name}/components/model_trainer.py", + f"{project_name}/components/model_pusher.py", + f"{project_name}/constant/__init__.py", + f"{project_name}/constant/application.py", + f"{project_name}/constant/training_pipeline/__init__.py", + f"{project_name}/entity/__init__.py", + f"{project_name}/entity/artifact_entity.py", + f"{project_name}/entity/config_entity.py", + f"{project_name}/exception/__init__.py", + f"{project_name}/logger/__init__.py", + f"{project_name}/pipeline/__init__.py", + f"{project_name}/pipeline/training_pipeline.py", + f"{project_name}/utils/__init__.py", + f"{project_name}/pipeline/main_utils.py", + "app.py", + "main.py", + "setup.py" +] + + +for filepth in list_of_files: + filepath = Path(filepth) + filedir, filename = os.path.split(filepath) + + if filedir !="": + os.makedirs(filedir, exist_ok=True) + + if (not os.path.exists(filepath)) or (os.path.getsize(filepath) == 0): + with open(filepath, "w") as f: + pass + + else: + logging.info("file is already present at : {filepath}") \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..163e7254b91875973ba08e71da0e994dbb0d729a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +pymongo +flask +requests +numpy +python-dotenv +scikit-learn +dill +ultralytics \ No newline at end of file diff --git a/static/images/crop-recommendation-logo.png b/static/images/crop-recommendation-logo.png new file mode 100644 index 0000000000000000000000000000000000000000..0e416bc17bf74e59e1b8b1f6349100e0175d8516 Binary files /dev/null and b/static/images/crop-recommendation-logo.png differ diff --git a/static/images/farmer_01.jpg b/static/images/farmer_01.jpg new file mode 100644 index 0000000000000000000000000000000000000000..841aa88dc3fa2613eb428ccc05a1bf28e8274b2c Binary files /dev/null and b/static/images/farmer_01.jpg differ diff --git a/static/images/farmer_02.jpg b/static/images/farmer_02.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f3053d9e61b0d2bbf57c4ecc3f44143b5758f63e Binary files /dev/null and b/static/images/farmer_02.jpg differ diff --git a/static/images/farmer_03.jpg b/static/images/farmer_03.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2515bee8e305761567f9eeb5d5d6d5bfdf788096 Binary files /dev/null and b/static/images/farmer_03.jpg differ diff --git a/static/images/farmer_04.jpg b/static/images/farmer_04.jpg new file mode 100644 index 0000000000000000000000000000000000000000..527fd372602adf7bd173aff3ffb7fb63ae080226 Binary files /dev/null and b/static/images/farmer_04.jpg differ diff --git a/static/images/farmer_05.jpg b/static/images/farmer_05.jpg new file mode 100644 index 0000000000000000000000000000000000000000..febfd3e1c71b98630937a2bedf88838df735a141 Binary files /dev/null and b/static/images/farmer_05.jpg differ diff --git a/static/images/fertilizer-logo.png b/static/images/fertilizer-logo.png new file mode 100644 index 0000000000000000000000000000000000000000..8bcb87fd609bf47f92959b76c0e0f3dc71085dcd Binary files /dev/null and b/static/images/fertilizer-logo.png differ diff --git a/static/images/github-logo.png b/static/images/github-logo.png new file mode 100644 index 0000000000000000000000000000000000000000..ec469827a94a513c6d6876f7b8177ee337f98f18 Binary files /dev/null and b/static/images/github-logo.png differ diff --git a/static/images/image-classification-logo.png b/static/images/image-classification-logo.png new file mode 100644 index 0000000000000000000000000000000000000000..c303b76ed2c8f08a8e42c06bdcdaf465766f50c0 Binary files /dev/null and b/static/images/image-classification-logo.png differ diff --git a/static/images/india_location.png b/static/images/india_location.png new file mode 100644 index 0000000000000000000000000000000000000000..68273bb3df431d7678657cacfe806ebfff1b91ad Binary files /dev/null and b/static/images/india_location.png differ diff --git a/static/images/linkedin-icon.png b/static/images/linkedin-icon.png new file mode 100644 index 0000000000000000000000000000000000000000..6855d8b4dea2d5102c90074e252e92338ebdcc0a Binary files /dev/null and b/static/images/linkedin-icon.png differ diff --git a/static/images/logo.png b/static/images/logo.png new file mode 100644 index 0000000000000000000000000000000000000000..025998ffcad6d8ab574b824877b7abc204767e65 Binary files /dev/null and b/static/images/logo.png differ diff --git a/static/images/market-price-logo.png b/static/images/market-price-logo.png new file mode 100644 index 0000000000000000000000000000000000000000..11c9d2b8e20842fafe41ec4fad33485c09876a23 Binary files /dev/null and b/static/images/market-price-logo.png differ diff --git a/static/images/wave1.png b/static/images/wave1.png new file mode 100644 index 0000000000000000000000000000000000000000..54372d71cca6e4e366b8dd6d3b82e7f56d4487ea Binary files /dev/null and b/static/images/wave1.png differ diff --git a/static/images/wave1_mod.png b/static/images/wave1_mod.png new file mode 100644 index 0000000000000000000000000000000000000000..9bded52afb6b1492ac4ed1a92a56216c9dca17b5 Binary files /dev/null and b/static/images/wave1_mod.png differ diff --git a/static/images/wave2.png b/static/images/wave2.png new file mode 100644 index 0000000000000000000000000000000000000000..a92a04087dd542e2a750468eeaef62aa0bee7bb9 Binary files /dev/null and b/static/images/wave2.png differ diff --git a/static/style.css b/static/style.css new file mode 100644 index 0000000000000000000000000000000000000000..5db0f87ee24f7b61ddf88363a7e7e0685051f349 --- /dev/null +++ b/static/style.css @@ -0,0 +1,272 @@ +body { + + padding: 0; + margin: 0; +} + +/*------------navigation bar ----------------*/ +#nav-bar { + position: sticky; + top: 0; + z-index: 10; +} + +.navbar { + background-image: linear-gradient(to right, #75ee71c7, #eceb84); + padding: 0 !important; +} + +.navbar-nav li { + padding: 0 10px; +} + +.navbar-nav li a { + font-weight: 600; +} + + +/*-----------------banner section----------*/ +#banner { + background-image: linear-gradient(to right, #75ee71c7, #eceb84); + padding-top: 5%; +} + +.promo-title { + font-size: 40px; + font-weight: 600; + margin-top: 100px; +} + +.img-fluid { + border-radius: 12px; +} + +.bottom-img { + width: 100%; +} + +/*-----------------Services section----------*/ +#services { + padding: 80px 0; +} + +.service-img { + width: 100px; + margin-top: 20px; +} + +.service-name { + text-align: center; + /*Center horizontally*/ +} + +.services { + padding: 20px; +} + +.title::before { + content: ""; + background: hsl(128, 84%, 66%); + height: 5px; + width: 200px; + margin-left: auto; + margin-right: auto; + display: block; + transform: translateY(63px); +} + +.title::after { + content: ""; + background: hsl(128, 84%, 66%); + height: 10px; + width: 50px; + margin-left: auto; + margin-right: auto; + margin-bottom: 40px; + display: block; + transform: translateY(8px); +} + +/*-----------------Services section----------*/ + +#about-us { + padding-bottom: 50px; + padding-top: 50px; +} + +#about-us ul li { + margin: 10px 0; +} + +/*-----------------quotes-------------------*/ +#quotes { + margin: 100px 0; +} + +.quotes { + border-left: 4px solid #f1f397e3; + margin-top: 50px; + margin-bottom: 50px; +} + +.quotes img { + height: 60px; + width: 60px; + border-radius: 50%; + margin: 0 10px; +} + +.user-details { + display: inline-block; + font-size: 12px; +} + +/*-----------------Social Media Section-------*/ +#social-media { + /* background-color: #D9F8C4; */ + padding: 100px 0; +} + +#social-media p { + font-size: 36px; + font-weight: 600; + margin-bottom: 30px; +} + +.social-icons img { + width: 120px; + transition: 0.5s; + padding: 20px; +} + +.social-icons a:hover img { + transform: translateY(-10px); +} + +/*---------Footer Section -------------------*/ + +#footer { + background-image: linear-gradient(to right, #75ee71c7, #eceb84); + color: rgb(106, 110, 110) +} + +#footer img { + width: 100%; +} + +.footer-box { + padding: 20px; +} + +#footer .footer-box img { + width: 30%; + margin-bottom: 20px; + border-radius: 5%; +} + +#footer .footer-box .addition-info { + display: inline-block; + font-size: 20px; +} + +/*-----------recommendation input -----*/ +.recommendation-input-container { + max-width: 800px; + /* Set the maximum width of the container */ + margin: 60px auto 0; + /* Center the container horizontally and add 60px top margin */ + display: flex; + align-items: center; + background-color: white; + padding: 20px; + border-radius: 15px; + /* Add 15px border-radius to all corners */ +} + +/*-----------image classification -----*/ +.image_input { + width: 50%; + position: absolute; + top: 50%; + left: 50%; + transform: translate(-50%, -50%); + background-color: white; + padding: 30px; + border-radius: 20px; +} + +/*-----------Market Input -----------*/ +.market_input { + width: 40%; + position: absolute; + top: 50%; + left: 50%; + transform: translate(-50%, -50%); + background-color: white; + padding: 15px; + border-radius: 20px; +} + + +/*-------Crop Recommendation Output---*/ +.crop-image { + background-color: white; + padding: 30px; + border-radius: 20px; + } + .crop-image img { + border-radius: 20px; + width: 100%; + } + .crop-image .text h1 { + font-size: 35px; + margin-bottom: 20px; + } + + .crop-image .text p { + font-size: 18px; + } + + .crop-image a{ + margin-top: 20px; + } + .crop-image h2{ + margin-bottom: 30px; + } + + +/*-------Image Classification output page---*/ + .plant-image { + background-color: white; + padding: 30px; + border-radius: 20px; + width: 60%; + } + .plant-image img { + border-radius: 20px; + width: 70%; + margin: 0 auto; + display: block; + } + + .plant-image a{ + margin-top: 20px; + + } + .plant-image h2{ + margin-bottom: 30px; + } + + + /*-------market price output message---*/ + .market_price_message{ + background-color: white; + padding: 20px; + border-radius: 15px; + width: 70%; + margin-top: 5%; +} + +.market_price_message a{ + margin-top: 10px; +} \ No newline at end of file diff --git a/static/uploaded_image/plant_image.JPG b/static/uploaded_image/plant_image.JPG new file mode 100644 index 0000000000000000000000000000000000000000..f3145cac8a3735af7f18fac6e67e563613e026fa Binary files /dev/null and b/static/uploaded_image/plant_image.JPG differ diff --git a/templates/base.html b/templates/base.html new file mode 100644 index 0000000000000000000000000000000000000000..b3493c12db2aa540270c668ba68a1165c319d813 --- /dev/null +++ b/templates/base.html @@ -0,0 +1,17 @@ + + + + + + CropGaurd + + + + + + {% block body %} {% endblock %} + + diff --git a/templates/crop_recommendation_input.html b/templates/crop_recommendation_input.html new file mode 100644 index 0000000000000000000000000000000000000000..0a0e378a56282080c77f991853c7e706848996c0 --- /dev/null +++ b/templates/crop_recommendation_input.html @@ -0,0 +1,96 @@ +{% extends 'base.html' %} {% block body %} {% include "nav_bar.html" %} + +
+
+

AI-Powered Crop Recommendations

+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+
+{% endblock %} diff --git a/templates/crop_recommendation_output.html b/templates/crop_recommendation_output.html new file mode 100644 index 0000000000000000000000000000000000000000..2cc0654ea221dfa94acae5c0e02c000dbaa5857e --- /dev/null +++ b/templates/crop_recommendation_output.html @@ -0,0 +1,38 @@ +{% extends 'base.html' %} {% block body %} {% include "nav_bar.html" %} + + +
+
+
+

Crop Recommendation

+
+ Image +
+
+

{{input_file_name.capitalize()}}

+

{{crop_details}}

+
+
+ Check with Different Values +
+
+ +
+ +{% endblock %} \ No newline at end of file diff --git a/templates/fertilizer_recommendation_input.html b/templates/fertilizer_recommendation_input.html new file mode 100644 index 0000000000000000000000000000000000000000..354b79e8dd6b74a41a7b4addfd22a8002003aee4 --- /dev/null +++ b/templates/fertilizer_recommendation_input.html @@ -0,0 +1,108 @@ +{% extends 'base.html' %} {% block body %} {% include "nav_bar.html" %} + +
+
+

AI-Powered Fertilizer Recommendations

+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+
+ +{% endblock %} diff --git a/templates/fertilizer_recommendation_ouput.html b/templates/fertilizer_recommendation_ouput.html new file mode 100644 index 0000000000000000000000000000000000000000..017298e9669aab9ef422ae334b240777bc54c4f7 --- /dev/null +++ b/templates/fertilizer_recommendation_ouput.html @@ -0,0 +1,37 @@ +{% extends 'base.html' %} {% block body %} {% include "nav_bar.html" %} + +
+
+
+

Fertilizer Recommendation

+
+ Image +
+
+

{{label.upper()}}

+

{{fertilizer_details}}

+
+
+ Check with Different Values +
+
+
+
+ +{% endblock %} \ No newline at end of file diff --git a/templates/image_classification_input.html b/templates/image_classification_input.html new file mode 100644 index 0000000000000000000000000000000000000000..6a6e7f3bce70753cb5430ed8f121a903ed42e842 --- /dev/null +++ b/templates/image_classification_input.html @@ -0,0 +1,33 @@ +{% extends 'base.html' %} {% block body %} {% include "nav_bar.html" %} + + +
+
+
+

Please upload the image

+
+ +
+
+ + +
+
+
+
+{% endblock %} diff --git a/templates/image_classification_output.html b/templates/image_classification_output.html new file mode 100644 index 0000000000000000000000000000000000000000..8ad4e67b2f17d5ba6197de43ffeae475aab30a7c --- /dev/null +++ b/templates/image_classification_output.html @@ -0,0 +1,38 @@ +{% extends 'base.html' %} {% block body %} {% include "nav_bar.html" %} + +
+
+
+

Image Classification

+
+ Image +
+
+

{{model_prediction}}

+

{{diseases_details}}

+
+
+ Check with different image +
+
+
+
+{%endblock %} \ No newline at end of file diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000000000000000000000000000000000000..a170d56f1695dc7558068bb1cbadd4a4274c55f4 --- /dev/null +++ b/templates/index.html @@ -0,0 +1,325 @@ +{% extends 'base.html' %} {% block body %} {% include "nav_bar.html" %} + + + +
+
+

What We Do ?

+
+
+ +

Crop
Recommendation

+
+
+ +

Fertilizer
Recommendation

+
+
+ +

Dieases
Classification

+
+
+ +

Market
Price

+
+
+
+
+ +
+
+

Why We Do It.

+
+
+
    +
  • + The purpose of our website is to assist farmers in making better + decisions for their crops. +
  • +
  • + Every soil has unique fertilizer requirements and suits specific + crops, making personalized recommendations essential. +
  • +
  • + By harnessing the power of technology, we aim to bridge the + gap between agricultural practices and data-driven insights. +
  • +
  • + We aim to provide a comprehensive one-stop solution to cater to all + the needs of farmers. +
  • +
+
+
+ +
+
+
+
+ +
+
+

How We Do It.

+
+
+
    +
  • + Our website integrates cutting-edge machine learning and deep + learning algorithms to facilitate informed decision-making. +
  • +
  • + These algorithms are trained on vast and diverse datasets, ensuring + accurate and reliable recommendations. +
  • +
  • + We leverage the power of data-driven insights to deliver valuable + suggestions to farmers. +
  • +
+
+
+ +
+
+
+
+ +
+
+

What We Do ?

+
+
+
    +
  • + Our website is a farmer-centric platform that offers a range + of services based on user inputs. +
  • +
  • + Crop Recommendation: Using soil analysis and other relevant + factors, we suggest the most suitable crops for a specific plot of + land. +
  • +
  • + Fertilizer Recommendation: We provide tailored fertilizer + suggestions based on the soil's unique needs and crop choice. +
  • +
  • + Plant Health Assessment: Our computer vision technology + analyzes leaf images to determine the plant's health status and + identify potential diseases. +
  • +
  • + Market Price Information: Farmers can access current market + prices for their crops using data fetched from the Indian + government's API. +
  • +
+
+
+ +
+
+
+
+ +
+
+

How It Benefits Farmers ?

+
+
+
    +
  • + Empowering Decision Making: By receiving personalized + recommendations, farmers can make informed choices for their crops + and fertilizers. +
  • +
  • + Crop Yield: Tailored suggestions lead to optimal crop choices + and better fertilizer usage, resulting in increased yields. +
  • +
  • + Early Disease Detection: Our plant health assessment helps + detect diseases at an early stage, allowing farmers to take timely + action and prevent losses. +
  • +
  • + Market Awareness: Access to real-time market prices enables + farmers to sell their produce at the most favorable rates. +
  • +
+
+
+ +
+
+
+
+ +
+
+

Quotes

+
+
+

+ Agriculture is knowledge, it is skill, and it is the instrument of the + economic transformation of the country. +

+ +

Mahatma Gandhi

+
+
+

+ We must harness the power of technology and science to revolutionize + agriculture and improve the lives of farmers. +

+ +

+ A. P. J. Abdul Kalam
The former President of India +

+
+
+
+
+ +
+

Services

+
+
+
+
+ ... +
+
Crop Recommendation
+

+ Harvesting Insights: Transforming Farming with AI-Driven Crop Recommendations +

+ Visit Page +
+
+
+
+
+ ... +
+
Fertilizer Recommendation
+

+ Fertile Future: Enhancing Agriculture with AI-Driven Fertilizer Recommendations +

+ Visit Page +
+
+
+
+
+ ... +
+
Dieases Classification
+

+ Sight to Insight: Advancing Farming through Image-Based Disease Identification +

+ Visit Page +
+
+
+
+
+ ... +
+
Market Price
+

+ Price Insights: Commodity Markets in Indian States with Real-time Government Data +

+ Visit Page +
+
+
+
+
+
+ +
+
+

Connect on Social Media

+ +
+
+ + +{% endblock %} diff --git a/templates/input.html b/templates/input.html new file mode 100644 index 0000000000000000000000000000000000000000..c5f3122c3dbc9adb6151ed56a7df8a0f515823d9 --- /dev/null +++ b/templates/input.html @@ -0,0 +1,26 @@ +{% extends 'base.html' %} {% block body %} {% include "nav_bar.html" %} + +
+ +
+

This is input page

+ + +
+
+ + +
+
+
+{% endblock %} \ No newline at end of file diff --git a/templates/market_price_input.html b/templates/market_price_input.html new file mode 100644 index 0000000000000000000000000000000000000000..feb9d601994bf733909ae01e41c48b102071d68e --- /dev/null +++ b/templates/market_price_input.html @@ -0,0 +1,68 @@ +{% extends 'base.html' %} {% block body %} {% include "nav_bar.html" %} + + +
+
+

Please select the state

+
+ + +
Please select a valid state.
+
+
+ + +
+
+
+{% endblock %} diff --git a/templates/market_price_no_data.html b/templates/market_price_no_data.html new file mode 100644 index 0000000000000000000000000000000000000000..3d08838c76aa2ee9ff393ba51ffeea5b545544ae --- /dev/null +++ b/templates/market_price_no_data.html @@ -0,0 +1,20 @@ +{% extends 'base.html' %} {% block body %} {% include "nav_bar.html" %} + + +
+
Commodity prices for the selected state are currently unavailable from the Government API. + Please consider trying again later or selecting a different state.
+ +
+ +{% endblock %} \ No newline at end of file diff --git a/templates/market_price_output.html b/templates/market_price_output.html new file mode 100644 index 0000000000000000000000000000000000000000..2710bb4b5ace7239c404262c32ee468d3ebcf307 --- /dev/null +++ b/templates/market_price_output.html @@ -0,0 +1,44 @@ +{% extends 'base.html' %} {% block body %} {% include "nav_bar.html" %} + +
+

Commodity Prices

+ + + {% for header in data[0] %} + + {% endfor %} + + {% set data_length = data|length %} {% for index_value in range(data_length) + %} + + {% for row in data[index_value].values() %} + + {% endfor %} + + {% endfor %} +
{{header|upper}}
{{row}}
+ + +
+{% endblock %} diff --git a/templates/nav_bar.html b/templates/nav_bar.html new file mode 100644 index 0000000000000000000000000000000000000000..73751579132d7a89fc804f7569f00264c3ec0d37 --- /dev/null +++ b/templates/nav_bar.html @@ -0,0 +1,68 @@ + diff --git a/templates/result_new.html b/templates/result_new.html new file mode 100644 index 0000000000000000000000000000000000000000..5304bf753f9b84100e23f64734b090708b55006f --- /dev/null +++ b/templates/result_new.html @@ -0,0 +1,30 @@ +{% extends 'base.html' %} {% block body %} {% include "nav_bar.html" %} + + +
+
+
+
+ Image +
+
+

{{input_file_name.capitalize()}}

+

{{crop_details}}

+
+
+
+
+ +{% endblock %} + + diff --git a/utils.py b/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..6036637721055476483301789fb6102297628041 --- /dev/null +++ b/utils.py @@ -0,0 +1,56 @@ +from pymongo import MongoClient +from dotenv import load_dotenv +import gridfs +import pickle +import os + +# Load environment variables from .env file +load_dotenv() + +def load_model_and_encoders(model_path, transformer_path, target_encoder_path): + with open(model_path, 'rb') as f: + model = pickle.load(f) + + with open(transformer_path, 'rb') as f: + pipeline_encoder = pickle.load(f) + + with open(target_encoder_path, 'rb') as f: + label_encoder = pickle.load(f) + + return model, pipeline_encoder, label_encoder + + +def retrieve_image_by_name_from_mongodb(file_name, database_name, collection_name): + # Establish a connection to MongoDB + client = MongoClient(os.getenv("MONGO_URL")) + + # Access the specified database + db = client[database_name] + + # Create a new GridFS object (a specification for storing and retrieving large binary objects) + fs = gridfs.GridFS(db, collection=collection_name) + + # Find the image data using the filename in the metadata + image_data = fs.find_one({"filename": file_name}) + + try: + if image_data is None: + raise ValueError("image_data is None") + + return image_data.read() + except Exception as e: + print(f"An error occurred: {e}") + raise # Re-raise the caught exception + + +def retrieve_data(database_name, collection_name, search_query): + # Connect to MongoDB + client = MongoClient(os.getenv("MONGO_URL")) + database = client[database_name] + collection = database[collection_name] + + # Search for the document based on the provided query + result = collection.find_one(search_query) + + client.close() + return result['data_info']