Spaces:

Sadashiv
/

CropGaurd

Runtime error

App Files Files Community

Sadashiv commited on Apr 20

Commit

625ed08

•

1 Parent(s): e7df225

Upload 31 files

Browse files

Files changed (33) hide show

.gitattributes +1 -0
crop-recommendation +0 -1
crop-recommendation/.gitignore +168 -0
crop-recommendation/.vscode/extensions.json +10 -0
crop-recommendation/.vscode/settings.json +8 -0
crop-recommendation/.vscode/tasks.json +15 -0
crop-recommendation/LICENSE +21 -0
crop-recommendation/README.md +50 -0
crop-recommendation/data_download.py +40 -0
crop-recommendation/main.py +8 -0
crop-recommendation/notebooks/crop-recommendation-notebook.ipynb +0 -0
crop-recommendation/requirements.txt +11 -0
crop-recommendation/saved_models/0/model/model.pkl +3 -0
crop-recommendation/saved_models/0/target_encoder/target_encoder.pkl +0 -0
crop-recommendation/saved_models/0/transformer/transformer.pkl +0 -0
crop-recommendation/src/__init__.py +0 -0
crop-recommendation/src/components/__init__.py +0 -0
crop-recommendation/src/components/data_ingestion.py +73 -0
crop-recommendation/src/components/data_trasformation.py +113 -0
crop-recommendation/src/components/data_validation.py +159 -0
crop-recommendation/src/components/model_evaluation.py +123 -0
crop-recommendation/src/components/model_pusher.py +69 -0
crop-recommendation/src/components/model_trainer.py +107 -0
crop-recommendation/src/config.py +20 -0
crop-recommendation/src/entity/__init__.py +0 -0
crop-recommendation/src/entity/artifact_entity.py +40 -0
crop-recommendation/src/entity/config_entity.py +120 -0
crop-recommendation/src/exception.py +21 -0
crop-recommendation/src/logger.py +22 -0
crop-recommendation/src/pipeline/__init__.py +0 -0
crop-recommendation/src/pipeline/training_pipeline.py +95 -0
crop-recommendation/src/predictor.py +100 -0
crop-recommendation/src/utils.py +106 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1 @@


1	+ crop-recommendation/saved_models/0/model/model.pkl filter=lfs diff=lfs merge=lfs -text

crop-recommendation DELETED Viewed

	@@ -1 +0,0 @@
1	- Subproject commit ce875580acbce4044f62e37db955614203e1232b

crop-recommendation/.gitignore ADDED Viewed

	@@ -0,0 +1,168 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+data_dump.py
+demo.ipynb
+kaggle.json
+crop-recommendation-dataset
+catboost_info
+temp.py
+artifact

crop-recommendation/.vscode/extensions.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "recommendations": [
+    "mongodb.mongodb-vscode",
+    "ms-python.python",
+    "ms-toolsai.jupyter",
+    "ms-toolsai.jupyter-keymap",
+    "ms-toolsai.jupyter-renderers",
+    "formulahendry.code-runner"
+  ]
+}

crop-recommendation/.vscode/settings.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+  "workbench.colorTheme": "Default Dark+",
+  "workbench.preferredDarkColorTheme": "Default Dark+",
+  "task.allowAutomaticTasks": "on",
+  "workbench.editorAssociations": {
+    "*.md": "vscode.markdown.preview.editor"
+  }
+}

crop-recommendation/.vscode/tasks.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "version": "2.0.0",
+  "tasks": [
+    {
+      "label": "Installing extensions and dependencies...",
+      "type": "shell",
+      "command": "code-server --install-extension mongodb.mongodb-vscode --install-extension ms-python.python --install-extension formulahendry.code-runner && pip install -r requirements.txt",
+      "presentation": {
+        "reveal": "always",
+        "panel": "new"
+      },
+      "runOptions": { "runOn": "folderOpen" }
+    }
+  ]
+}

crop-recommendation/LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2023 Sadashiv Nandanikar
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

crop-recommendation/README.md ADDED Viewed

	@@ -0,0 +1,50 @@

+# Crop Recommendation
+#### Harnessing the capabilities of machine learning models, analyzes specific parameters to suggest the most suitable crops, optimizing yields and efficiency.
+## Demo
+### Input Interface
+<img src="https://github.com/07Sada/crop-recommendation/assets/112761379/3f8c5f4d-1df4-4516-b428-f4b95a2cc5df" alt="Image 1" width="800">
+### Output Interface
+<img src="https://github.com/07Sada/crop-recommendation/assets/112761379/86a4aefd-b973-40ad-b79c-f2b1dd070d91" alt="Image 1" width="800">
+## Data Source
+This dataset contains information about the soil and environmental conditions that are ideal for growing different crops. The dataset includes the following columns:
+- `N`: The ratio of nitrogen content in the soil.
+- `P`: The ratio of phosphorus content in the soil.
+- `K`: The ratio of potassium content in the soil.
+- `Temperature`: The temperature in degrees Celsius.
+- `Humidity`: The relative humidity in percent.
+- `pH`: The pH value of the soil.
+- `Rainfall`: The rainfall in millimeters.
+[Link](https://www.kaggle.com/datasets/atharvaingle/crop-recommendation-dataset) for the dataset
+<details>
+  <summary>Supported crops
+</summary>
+- Apple
+- Blueberry
+- Cherry
+- Corn
+- Grape
+- Pepper
+- Orange
+- Peach
+- Potato
+- Soybean
+- Strawberry
+- Tomato
+- Squash
+- Raspberry
+</details>
+## Project Details
+This is repository is submodule for [CropGaurd](https://github.com/07Sada/CropGaurd.git)
+## Project PipeLine Stages
+![Project PipeLine Stages](https://user-images.githubusercontent.com/112761379/225940480-2a7381b2-6abd-4c1c-8287-0fd49099be8c.jpg)

crop-recommendation/data_download.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import opendatasets as od
+import os
+import json
+from dotenv import load_dotenv
+# Load variables from .env file
+load_dotenv()
+DATASET_URL = "https://www.kaggle.com/datasets/atharvaingle/crop-recommendation-dataset"
+def create_kaggle_json_file():
+    # Fetch the username and API key from the .env file
+    username = os.getenv('username')
+    key = os.getenv('key')
+    kaggle_credentials = {
+        "username": username,
+        "key": key
+    }
+    # Path to the kaggle.json file
+    kaggle_file_path = os.path.join(os.getcwd(), 'kaggle.json')
+    # Write the dictionary to the .kaggle/kaggle.json file
+    with open(kaggle_file_path, 'w') as file:
+        json.dump(kaggle_credentials, file)
+def remove_kaggle_json_file():
+    # Path to the kaggle.json file
+    kaggle_file_path = os.path.join(os.getcwd(), 'kaggle.json')
+    # Remove the kaggle.json file
+    os.remove(kaggle_file_path)
+create_kaggle_json_file()
+od.download(DATASET_URL)
+# Remove the kaggle.json file after downloading the dataset
+remove_kaggle_json_file()

crop-recommendation/main.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from src.pipeline.training_pipeline import start_training_pipeline
+if __name__ =="__main__":
+    try:
+        start_training_pipeline()
+    except Exception as e:
+        print(e)

crop-recommendation/notebooks/crop-recommendation-notebook.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

crop-recommendation/requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+pymongo
+pandas
+numpy
+matplotlib
+seaborn
+scikit-learn
+opendatasets
+python-dotenv
+ipykernel
+PyYAML
+dill

crop-recommendation/saved_models/0/model/model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:61347ed5e6bbb2060eddc5a515c43e9d61aae5f6f1c7eaecb1f52b64f2df89a5
+size 3676666

crop-recommendation/saved_models/0/target_encoder/target_encoder.pkl ADDED Viewed

Binary file (499 Bytes). View file

crop-recommendation/saved_models/0/transformer/transformer.pkl ADDED Viewed

Binary file (901 Bytes). View file

crop-recommendation/src/__init__.py ADDED Viewed

File without changes

crop-recommendation/src/components/__init__.py ADDED Viewed

File without changes

crop-recommendation/src/components/data_ingestion.py ADDED Viewed

	@@ -0,0 +1,73 @@

+from src.entity import config_entity
+from src.entity import artifact_entity
+from src.exception import CropException
+from src.logger import logging
+from src import utils
+from sklearn.model_selection import train_test_split
+import numpy as np
+import pandas as pd
+import sys
+import os
+class DataIngestion:
+    def __init__(self, data_ingestion_config: config_entity.DataIngestionConfig):
+        try:
+            logging.info(f"{'>>'*20} Data Ingestion {'<<'*20}")
+            self.data_ingestion_config = data_ingestion_config
+        except Exception as e:
+            raise CropException(e, sys)
+    def initiate_data_ingestion(self) -> artifact_entity.DataIngestionArtifact:
+        try:
+            logging.info("Exporting collection data as pandas dataframe")
+            df: pd.DataFrame = utils.get_collection_as_dataframe(
+                database_name=self.data_ingestion_config.database_name,
+                collection_name=self.data_ingestion_config.collection_name,
+            )
+            logging.info("Saving data in feature store")
+            feature_store_dir = os.path.dirname(self.data_ingestion_config.feature_store_file_path)
+            os.makedirs(feature_store_dir, exist_ok=True)
+            logging.info("Saving dataframe into feature store")
+            df.to_csv(
+                path_or_buf=self.data_ingestion_config.feature_store_file_path,
+                index=False,
+                header=True,
+            )
+            logging.info("split dataset into train and test test")
+            train_df, test_df = train_test_split(
+                df, test_size=self.data_ingestion_config.test_size, random_state=42
+            )
+            logging.info("create dataset directory folder if not available")
+            dataset_dir = os.path.dirname(self.data_ingestion_config.train_file_path)
+            os.makedirs(dataset_dir, exist_ok=True)
+            logging.info("Save df to feature store folder")
+            train_df.to_csv(
+                path_or_buf=self.data_ingestion_config.train_file_path,
+                index=False,
+                header=True,
+            )
+            test_df.to_csv(
+                path_or_buf=self.data_ingestion_config.test_file_path,
+                index=False,
+                header=True,
+            )
+            data_ingestion_artifact = artifact_entity.DataIngestionArtifact(
+                feature_store_file_path=self.data_ingestion_config.feature_store_file_path,
+                train_file_path=self.data_ingestion_config.train_file_path,
+                test_file_path=self.data_ingestion_config.test_file_path,
+            )
+            logging.info(f"Data ingestion artifact: {data_ingestion_artifact}")
+            return data_ingestion_artifact
+        except Exception as e:
+            raise CropException(error_message=e, error_detail=sys)

crop-recommendation/src/components/data_trasformation.py ADDED Viewed

	@@ -0,0 +1,113 @@

+from src.entity import artifact_entity
+from src.entity import config_entity
+from src.logger import logging
+from src.exception import CropException
+from src import utils
+from src.config import TARGET_COLUMN
+from typing import Optional
+import os
+import sys
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import LabelEncoder
+from sklearn.preprocessing import StandardScaler
+import pandas as pd
+import numpy as np
+class DataTransformation:
+    def __init__(
+        self,
+        data_transformation_config: config_entity.DataTransformationConfig,
+        data_ingestion_artifact: artifact_entity.DataIngestionArtifact,
+    ):
+        try:
+            logging.info(f"{'>'*20} Data Transformation Initiated {'<'*20}")
+            self.data_transformation_config = data_transformation_config
+            self.data_ingestion_artifact = data_ingestion_artifact
+        except Exception as e:
+            raise CropException(e, sys)
+    @classmethod
+    def get_data_tranformer_object(cls) -> Pipeline:
+        try:
+            standard_scaler = StandardScaler()
+            pipeline = Pipeline(steps=[("StandardScaler", standard_scaler)])
+            return pipeline
+        except Exception as e:
+            raise CropException(e, sys)
+    def initiate_data_transformation(
+        self,
+    ) -> artifact_entity.DataTransformationArtifact:
+        try:
+            # reading training and testing file
+            train_df = pd.read_csv(self.data_ingestion_artifact.train_file_path)
+            test_df = pd.read_csv(self.data_ingestion_artifact.test_file_path)
+            # selecting input features for train and test dataframe
+            input_feature_train_df = train_df.drop(TARGET_COLUMN, axis=1)
+            input_feature_test_df = test_df.drop(TARGET_COLUMN, axis=1)
+            # selecting target feature for train and test dataframe
+            target_feature_train_df = train_df[TARGET_COLUMN]
+            target_feature_test_df = test_df[TARGET_COLUMN]
+            label_encoder = LabelEncoder()
+            label_encoder.fit(target_feature_train_df)
+            # transformation on target column
+            target_feature_train_arr = label_encoder.transform(target_feature_train_df)
+            target_feature_test_arr = label_encoder.transform(target_feature_test_df)
+            # transforming input features
+            transformation_pipeline = DataTransformation.get_data_tranformer_object()
+            transformation_pipeline.fit(input_feature_train_df)
+            input_feature_train_arr = transformation_pipeline.transform(
+                input_feature_train_df
+            )
+            input_feature_test_arr = transformation_pipeline.transform(
+                input_feature_test_df
+            )
+            train_arr = np.c_[input_feature_train_arr, target_feature_train_arr]
+            test_arr = np.c_[input_feature_test_arr, target_feature_test_arr]
+            # save the numpy array
+            utils.save_object(
+                file_path=self.data_transformation_config.transformed_train_path,
+                obj=train_arr,
+            )
+            utils.save_object(
+                file_path=self.data_transformation_config.transformed_test_path,
+                obj=test_arr,
+            )
+            utils.save_object(
+                file_path=self.data_transformation_config.transform_object_path,
+                obj=transformation_pipeline,
+            )
+            utils.save_object(
+                file_path=self.data_transformation_config.target_encoder_path,
+                obj=label_encoder,
+            )
+            data_transformation_artifact = artifact_entity.DataTransformationArtifact(
+                transform_object_path=self.data_transformation_config.transform_object_path,
+                transformed_train_path=self.data_transformation_config.transformed_train_path,
+                transformed_test_path=self.data_transformation_config.transformed_test_path,
+                target_encoder_path=self.data_transformation_config.target_encoder_path,
+            )
+            logging.info(f"Data transformation object : {data_transformation_artifact}")
+            return data_transformation_artifact
+        except Exception as e:
+            raise CropException(e, sys)

crop-recommendation/src/components/data_validation.py ADDED Viewed

	@@ -0,0 +1,159 @@

+from src.entity import artifact_entity
+from src.entity import config_entity
+from src.logger import logging
+from src.exception import CropException
+from src.config import TARGET_COLUMN
+from src import utils
+from typing import Optional
+from scipy.stats import ks_2samp
+import pandas as pd
+import numpy as np
+import sys
+import os
+class DataValidation:
+    def __init__(
+        self,
+        data_validation_config: config_entity.DataValidationConfig,
+        data_ingestion_artifact: artifact_entity.DataIngestionArtifact,
+    ):
+        try:
+            logging.info(f"{'>'*20} Data Validation iniated {'<'*20}")
+            self.data_validation_config = data_validation_config
+            self.data_ingestion_artifact = data_ingestion_artifact
+            self.validation_error = dict()
+        except Exception as e:
+            raise CropException(e, sys)
+    def is_required_columns_exists(
+        self, base_df: pd.DataFrame, current_df: pd.DataFrame, report_key_name: str
+    ) -> bool:
+        try:
+            base_columns = base_df.columns
+            current_columns = current_df.columns
+            missing_columns = []
+            for base_column in base_columns:
+                if base_column not in current_columns:
+                    logging.info(f"Column: {base_column} is not available")
+                    missing_columns.append(base_column)
+            if len(missing_columns) > 0:
+                self.validation_error[report_key_name] = missing_columns
+                return False
+            return True
+        except Exception as e:
+            raise CropException(e, sys)
+    def data_drift(
+        self, base_df: pd.DataFrame, current_df: pd.DataFrame, report_key_name: str
+    ):
+        try:
+            drift_report = dict()
+            base_columns = base_df.columns
+            current_columns = current_df.columns
+            for base_column in base_columns:
+                base_data, current_data = base_df[base_column], current_df[base_column]
+                # Null hypothesis is that both columns data drawn from same distribution
+                logging.info(
+                    f"Hypothesis {base_column} : {base_data.dtype}, {current_data.dtype}"
+                )
+                same_distribution = ks_2samp(base_data, current_data)
+                if same_distribution.pvalue > 0.05:
+                    # we are accepting the null hypothesis
+                    drift_report[base_column] = {
+                        "pvalue": float(same_distribution.pvalue),
+                        "same_distribution": True,
+                    }
+                else:
+                    drift_report[base_column] = {
+                        "pvalue": float(same_distribution.pvalue),
+                        "same_distribution": False,
+                    }
+            self.validation_error[report_key_name] = drift_report
+        except Exception as e:
+            raise CropException(e, sys)
+    def initiate_data_validation(self) -> artifact_entity.DataValidationArtifact:
+        try:
+            logging.info(f"Reading base dataframe")
+            base_df = pd.read_csv(self.data_validation_config.base_file_path)
+            logging.info(f"Reading train dataframe")
+            train_df = pd.read_csv(self.data_ingestion_artifact.train_file_path)
+            logging.info(f"Reading test dataframe")
+            test_df = pd.read_csv(self.data_ingestion_artifact.test_file_path)
+            exclude_column = [TARGET_COLUMN]
+            base_df = utils.seperate_dependant_column(
+                df=base_df, exclude_column=exclude_column
+            )
+            train_df = utils.seperate_dependant_column(
+                df=train_df, exclude_column=exclude_column
+            )
+            test_df = utils.seperate_dependant_column(
+                df=test_df, exclude_column=exclude_column
+            )
+            logging.info(f"Is all required columns present in the train_df")
+            train_df_columns_status = self.is_required_columns_exists(
+                base_df=base_df,
+                current_df=train_df,
+                report_key_name="missing_columns_within_train_dataset",
+            )
+            test_df_columns_status = self.is_required_columns_exists(
+                base_df=base_df,
+                current_df=test_df,
+                report_key_name="missing_columns_within_test_dataset",
+            )
+            if train_df_columns_status:
+                logging.info(
+                    f"As all column are available in train df hence detecting data drift"
+                )
+                self.data_drift(
+                    base_df=base_df,
+                    current_df=train_df,
+                    report_key_name="data_drift_within_train_dataset",
+                )
+            if test_df_columns_status:
+                logging.info(
+                    f"As all column are available in test df hence detecting data drift"
+                )
+                self.data_drift(
+                    base_df=base_df,
+                    current_df=test_df,
+                    report_key_name="data_drift_within_test_dataset",
+                )
+            # writing the report
+            logging.info("Writing report in yaml format")
+            utils.write_yaml_file(
+                file_path=self.data_validation_config.report_file_path,
+                data=self.validation_error,
+            )
+            data_validation_artifact = artifact_entity.DataValidationArtifact(
+                report_file_path=self.data_validation_config.report_file_path
+            )
+            logging.info(f"Data validation artifact: {data_validation_artifact}")
+            return data_validation_artifact
+        except Exception as e:
+            raise CropException(e, sys)

crop-recommendation/src/components/model_evaluation.py ADDED Viewed

	@@ -0,0 +1,123 @@

+from src.predictor import ModelResolver
+from src.entity import config_entity
+from src.entity import artifact_entity
+from src.logger import logging
+from src.exception import CropException
+from src.config import TARGET_COLUMN
+from src.utils import load_object
+from sklearn.metrics import f1_score
+import pandas as pd
+import numpy as np
+import os
+import sys
+class ModelEvaluation:
+    def __init__(
+        self,
+        model_eval_config: config_entity.ModelEvaluationConfig,
+        data_ingesiton_artifact: artifact_entity.DataIngestionArtifact,
+        data_transformation_artifact: artifact_entity.DataTransformationArtifact,
+        model_trainer_artifact: artifact_entity.ModelTrainerArtifact,
+    ):
+        try:
+            logging.info(f"{'>'*20} Model Evaluation Initiated {'<'*20}")
+            self.model_eval_config = model_eval_config
+            self.data_ingesiton_artifact = data_ingesiton_artifact
+            self.data_transformation_artifact = data_transformation_artifact
+            self.model_trainer_artifact = model_trainer_artifact
+            self.model_resolver = ModelResolver()
+        except Exception as e:
+            raise CropException(e, sys)
+    def initiate_model_evaluation(self) -> artifact_entity.ModelEvaluationArtifact:
+        try:
+            logging.info(
+                f"If the saved model directory contains a model, we will compare which model is best trained: \
+                            the model from the saved model folder or the new model."
+            )
+            latest_dir_path = self.model_resolver.get_latest_dir_path()
+            if latest_dir_path == None:
+                model_eval_artifact = artifact_entity.ModelEvaluationArtifact(
+                    is_model_accepted=True, improved_accuracy=None
+                )
+                logging.info(f"Model evaluation artifact: {model_eval_artifact}")
+                return model_eval_artifact
+            # finding location of transformed model, and target encoder
+            logging.info(f"Finding location of transformer model and target encoder")
+            transformer_path = self.model_resolver.get_latest_transformer_path()
+            model_path = self.model_resolver.get_latest_model_path()
+            target_encoder_path = self.model_resolver.get_latest_target_encoder_path()
+            logging.info(
+                f"Previous trained objects of transformer, model and target encoder"
+            )
+            # previous trained objects
+            transformer = load_object(file_path=transformer_path)
+            model = load_object(file_path=model_path)
+            target_encoder = load_object(file_path=target_encoder_path)
+            logging.info(f"Currently trained model objects")
+            # currently trained model objects
+            current_transformer = load_object(
+                file_path=self.data_transformation_artifact.transform_object_path
+            )
+            current_model = load_object(
+                file_path=self.model_trainer_artifact.model_path
+            )
+            current_target_encoder = load_object(
+                file_path=self.data_transformation_artifact.target_encoder_path
+            )
+            test_df = pd.read_csv(self.data_ingesiton_artifact.test_file_path)
+            target_df = test_df[TARGET_COLUMN]
+            y_true = target_encoder.transform(target_df)
+            # accuracy using previous trained model
+            input_feature_name = list(transformer.feature_names_in_)
+            input_arr = transformer.transform(test_df[input_feature_name])
+            y_pred = current_model.predict(input_arr)
+            y_true = current_target_encoder.transform(target_df)
+            previous_model_score = f1_score(
+                y_true=y_true, y_pred=y_pred, average="weighted"
+            )
+            # accuracy using current model
+            input_feature_name = list(current_transformer.feature_names_in_)
+            input_arr = current_transformer.transform(test_df[input_feature_name])
+            y_pred = current_model.predict(input_arr)
+            y_true = current_target_encoder.transform(target_df)
+            current_model_score = f1_score(
+                y_true=y_true, y_pred=y_pred, average="weighted"
+            )
+            logging.info(f"Accuracy using current trained model: {current_model_score}")
+            if current_model_score <= previous_model_score:
+                logging.info(f"Current trained model is not better than previous model")
+                raise Exception("Current trained model is not better than previous model")
+            model_eval_artifact = artifact_entity.ModelEvaluationArtifact(
+                is_model_accepted=True,
+                improved_accuracy=current_model_score - previous_model_score,
+            )
+            logging.info(f"Model Eval artifacts: {model_eval_artifact}")
+            return model_eval_artifact
+        except Exception as e:
+            raise CropException(e, sys)

crop-recommendation/src/components/model_pusher.py ADDED Viewed

	@@ -0,0 +1,69 @@

+from src.entity.config_entity import ModelPusherConfig
+from src.entity import artifact_entity
+from src.predictor import ModelResolver
+from src.exception import CropException
+from src.logger import logging
+from src.utils import load_object, save_object
+from src.entity.artifact_entity import (
+    DataTransformationArtifact,
+    ModelTrainerArtifact,
+    ModelPusherArtifact,
+)
+import sys
+import os
+class ModelPusher:
+    def __init__(
+        self,
+        model_pusher_config: ModelPusherConfig,
+        data_transformation_artifact: DataTransformationArtifact,
+        model_trainer_artifact: ModelTrainerArtifact,
+    ):
+        try:
+            logging.info(f"{'>'*20} Model Pusher Initiated {'<'*30}")
+            self.model_pusher_config = model_pusher_config
+            self.data_transformation_artifact = data_transformation_artifact
+            self.model_trainer_artifact = model_trainer_artifact
+            self.model_resolver = ModelResolver(
+                model_registry=self.model_pusher_config.saved_model_dir
+            )
+        except Exception as e:
+            raise CropException(e, sys)
+    def initiate_model_pusher(self) -> ModelPusherArtifact:
+        try:
+            # load object
+            logging.info(f"Loading transformer model and target encoder")
+            transformer = load_object(file_path=self.data_transformation_artifact.transform_object_path)
+            model = load_object(file_path=self.model_trainer_artifact.model_path)
+            target_encoder = load_object(file_path=self.data_transformation_artifact.target_encoder_path)
+            # model pusher dir
+            logging.info(f"Saving model into model pusher directory")
+            save_object(file_path=self.model_pusher_config.pusher_transformer_path,obj=transformer)
+            save_object(file_path=self.model_pusher_config.pusher_model_path, obj=model)
+            save_object(file_path=self.model_pusher_config.pusher_target_encoder_path, obj=target_encoder)
+            # saved model dir
+            logging.info(f"Saving model in saved model dir")
+            transformer_path = self.model_resolver.get_latest_save_transformer_path()
+            model_path = self.model_resolver.get_latest_save_model_path()
+            target_encoder_path = self.model_resolver.get_latest_save_target_encoder_path()
+            save_object(file_path=transformer_path, obj=transformer)
+            save_object(file_path=model_path, obj=model)
+            save_object(file_path=target_encoder_path, obj=target_encoder)
+            model_pusher_artifact = ModelPusherArtifact(
+                pusher_model_dir=self.model_pusher_config.pusher_model_dir,
+                saved_model_dir=self.model_pusher_config.saved_model_dir,
+            )
+            logging.info(f"Model Pusher artifact: {model_pusher_artifact}")
+            return model_pusher_artifact
+        except Exception as e:
+            raise CropException(e, sys)

crop-recommendation/src/components/model_trainer.py ADDED Viewed

	@@ -0,0 +1,107 @@

+from src.entity import config_entity
+from src.entity import artifact_entity
+from src.logger import logging
+from src.exception import CropException
+from src import utils
+from typing import Optional
+from sklearn.metrics import f1_score
+from sklearn.ensemble import RandomForestClassifier
+import os
+import sys
+class ModelTrainer:
+    def __init__(
+        self,
+        model_trainer_config: config_entity.ModelTrainerConfig,
+        data_transformation_artifact: artifact_entity.DataTransformationArtifact,
+    ):
+        try:
+            logging.info(f"{'>'*30} Model Trainer Initiated {'<'*30}")
+            self.model_trainer_config = model_trainer_config
+            self.data_transformation_artifact = data_transformation_artifact
+        except Exception as e:
+            raise CropException(e, sys)
+    def train_model(self, X, y):
+        try:
+            random_forest = RandomForestClassifier()
+            random_forest.fit(X, y)
+            return random_forest
+        except Exception as e:
+            raise CropException(e, sys)
+    def initiate_model_trainer(self) -> artifact_entity.ModelTrainerArtifact:
+        try:
+            logging.info(f"Loading train and test array")
+            train_arr = utils.load_numpy_array_data(
+                file_path=self.data_transformation_artifact.transformed_train_path
+            )
+            test_arr = utils.load_numpy_array_data(
+                file_path=self.data_transformation_artifact.transformed_test_path
+            )
+            logging.info(
+                f"Splitting input and target feature from both train and test arr. "
+            )
+            X_train, y_train = train_arr[:, :-1], train_arr[:, -1]
+            X_test, y_test = test_arr[:, :-1], test_arr[:, -1]
+            logging.info(f"Training the model")
+            model = self.train_model(X=X_train, y=y_train)
+            logging.info(f"Calculating f1 train scrore")
+            yhat_train = model.predict(X_train)
+            f1_train_score = f1_score(
+                y_true=y_train, y_pred=yhat_train, average="weighted"
+            )
+            logging.info(f"Calculating f1 test score")
+            yhat_test = model.predict(X_test)
+            f1_test_score = f1_score(
+                y_true=y_test, y_pred=yhat_test, average="weighted"
+            )
+            logging.info(
+                f"train_score: {f1_train_score} and test score: {f1_test_score}"
+            )
+            # checking for overfitting or underfitting or expected score
+            logging.info(f"Checking if out model is underfitting or not")
+            if f1_test_score < self.model_trainer_config.expected_score:
+                raise Exception(
+                    f"Model is not good as it is not able to give \
+                    expected accuracy: {self.model_trainer_config.expected_score}, model actual score: {f1_test_score}"
+                )
+            logging.info(f"Checking if our model is overfitting or not")
+            diff = abs(f1_train_score - f1_test_score)
+            if diff > self.model_trainer_config.overfitting_threshold:
+                raise Exception(
+                    f"Train and test score diff: {diff} \
+                    is more than overfitting threshold: {self.model_trainer_config.overfitting_threshold}"
+                )
+            # save the trained model
+            logging.info(f"Saving model object")
+            utils.save_object(file_path=self.model_trainer_config.model_path, obj=model)
+            # prepare artifact
+            logging.info(f"Prepare the artifact")
+            model_trainer_artifact = artifact_entity.ModelTrainerArtifact(
+                model_path=self.model_trainer_config.model_path,
+                f1_train_score=f1_train_score,
+                f2_test_score=f1_test_score,
+            )
+            logging.info(f"Model trainer artifact: {model_trainer_artifact}")
+            return model_trainer_artifact
+        except Exception as e:
+            raise CropException(e, sys)

crop-recommendation/src/config.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import pymongo
+import pandas as pd
+import json
+from dataclasses import dataclass
+import os
+from dotenv import load_dotenv
+load_dotenv()
+@dataclass
+class EnvironmentVariable:
+    mongo_db_url = os.getenv("MONGO_URL")
+env = EnvironmentVariable()
+mongo_client = pymongo.MongoClient(env.mongo_db_url)
+TARGET_COLUMN = "label"

crop-recommendation/src/entity/__init__.py ADDED Viewed

File without changes

crop-recommendation/src/entity/artifact_entity.py ADDED Viewed

	@@ -0,0 +1,40 @@

+from dataclasses import dataclass
+@dataclass
+class DataIngestionArtifact:
+    feature_store_file_path: str
+    train_file_path: str
+    test_file_path: str
+@dataclass
+class DataValidationArtifact:
+    report_file_path: str
+@dataclass
+class DataTransformationArtifact:
+    transform_object_path: str
+    transformed_train_path: str
+    transformed_test_path: str
+    target_encoder_path: str
+@dataclass
+class ModelTrainerArtifact:
+    model_path: str
+    f1_train_score: float
+    f2_test_score: float
+@dataclass
+class ModelEvaluationArtifact:
+    is_model_accepted: bool
+    improved_accuracy: float
+@dataclass
+class ModelPusherArtifact:
+    pusher_model_dir: str
+    saved_model_dir: str

crop-recommendation/src/entity/config_entity.py ADDED Viewed

	@@ -0,0 +1,120 @@

+import os
+import sys
+from src.exception import CropException
+from src.logger import logging
+from datetime import datetime
+FILE_NAME = "crop.csv"
+TRAIN_FILE_NAME = "train.csv"
+TEST_FILE_NAME = "test.csv"
+TRANSFORMER_OBJECT_FILE_NAME = "transformer.pkl"
+TARGET_ENCODER_OBJECT_FILE_NAME = "target_encoder.pkl"
+MODEL_FILE_NAME = "model.pkl"
+class TrainingPipelineConfig:
+    def __init__(self):
+        try:
+            self.artifact_dir = os.path.join(
+                os.getcwd(), "artifact", f"{datetime.now().strftime('%m%d%Y__%H%M%S')}"
+            )
+        except Exception as e:
+            raise CropException(e, sys)
+class DataIngestionConfig:
+    def __init__(self, training_pipeline_config: TrainingPipelineConfig):
+        try:
+            self.database_name = "smartcropguard"
+            self.collection_name = "crop"
+            self.data_ingestion_dir = os.path.join(
+                training_pipeline_config.artifact_dir, "data_ingestion"
+            )
+            self.feature_store_file_path = os.path.join(
+                self.data_ingestion_dir, "feature_store", FILE_NAME
+            )
+            self.train_file_path = os.path.join(
+                self.data_ingestion_dir, "dataset", TRAIN_FILE_NAME
+            )
+            self.test_file_path = os.path.join(
+                self.data_ingestion_dir, "dataset", TEST_FILE_NAME
+            )
+            self.test_size = 0.2
+        except Exception as e:
+            raise CropException(e, sys)
+    def to_dict(self) -> dict:
+        try:
+            return self.__dict__
+        except Exception as e:
+            raise CropException(e, sys)
+class DataValidationConfig:
+    def __init__(self, training_pipeline_config: TrainingPipelineConfig):
+        self.data_validation_dir = os.path.join(
+            training_pipeline_config.artifact_dir, "data_validation"
+        )
+        self.report_file_path = os.path.join(self.data_validation_dir, "report.yaml")
+        self.missing_threshold = 0.2
+        self.base_file_path = os.path.join(
+            "crop-recommendation-dataset/Crop_recommendation.csv"
+        )
+class DataTransformationConfig:
+    def __init__(self, training_pipeline_config: TrainingPipelineConfig):
+        self.data_transformation_dir = os.path.join(
+            training_pipeline_config.artifact_dir, "data_transformation"
+        )
+        self.transform_object_path = os.path.join(
+            self.data_transformation_dir,
+            "transformer",
+            TRANSFORMER_OBJECT_FILE_NAME
+        )
+        self.transformed_train_path = os.path.join(
+            self.data_transformation_dir,
+            "transformed",
+            TRAIN_FILE_NAME.replace("csv", "npz"),
+        )
+        self.transformed_test_path = os.path.join(
+            self.data_transformation_dir,
+            "transformed",
+            TEST_FILE_NAME.replace("csv", "npz"),
+        )
+        self.target_encoder_path = os.path.join(
+            self.data_transformation_dir,
+            "target_encoder",
+            TARGET_ENCODER_OBJECT_FILE_NAME,
+        )
+class ModelTrainerConfig:
+    def __init__(self, training_pipeline_config: TrainingPipelineConfig):
+        self.model_trainer_dir = os.path.join(
+            training_pipeline_config.artifact_dir, "model_trainer"
+        )
+        self.model_path = os.path.join(self.model_trainer_dir, "model", MODEL_FILE_NAME)
+        self.expected_score = 0.9
+        self.overfitting_threshold = 0.1
+class ModelEvaluationConfig:
+    def __init__(self, training_pipeline_config: TrainingPipelineConfig):
+        self.change_threshold = 0.01
+class ModelPusherConfig:
+    def __init__(self, training_pipeline_config: TrainingPipelineConfig):
+        self.model_pusher_dir = os.path.join(
+            training_pipeline_config.artifact_dir, "model_pusher"
+        )
+        self.saved_model_dir = os.path.join("saved_models")
+        self.pusher_model_dir = os.path.join(self.model_pusher_dir, "saved_models")
+        self.pusher_model_path = os.path.join(self.pusher_model_dir, MODEL_FILE_NAME)
+        self.pusher_transformer_path = os.path.join(
+            self.pusher_model_dir, TRANSFORMER_OBJECT_FILE_NAME
+        )
+        self.pusher_target_encoder_path = os.path.join(
+            self.pusher_model_dir, TARGET_ENCODER_OBJECT_FILE_NAME
+        )

crop-recommendation/src/exception.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import sys
+def error_message_detail(error, error_detail: sys):
+    _, _, exc_tb = error_detail.exc_info()
+    file_name = exc_tb.tb_frame.f_code.co_filename
+    error_message = "Error occurred python script name [{0}] line number [{1}] error message [{2}]".format(
+        file_name, exc_tb.tb_lineno, str(error)
+    )
+    return error_message
+class CropException(Exception):
+    def __init__(self, error_message, error_detail: sys):
+        self.error_message = error_message_detail(
+            error_message, error_detail=error_detail
+        )
+    def __str__(self):
+        return self.error_message

crop-recommendation/src/logger.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import logging
+import os
+from datetime import datetime
+# log file name
+LOG_FILE_NAME = f"{datetime.now().strftime('%m%d%Y__%H%M%S')}.log"
+# Log directory
+LOG_FILE_DIR = os.path.join(os.getcwd(), "logs")
+# create folder if not available
+os.makedirs(LOG_FILE_DIR, exist_ok=True)
+# Log file path
+LOG_FILE_PATH = os.path.join(LOG_FILE_DIR, LOG_FILE_NAME)
+logging.basicConfig(
+    filename=LOG_FILE_PATH,
+    format="[ %(asctime)s ] %(filename)s - %(lineno)d %(name)s - %(levelname)s - %(message)s",
+    level=logging.INFO,
+)

crop-recommendation/src/pipeline/__init__.py ADDED Viewed

File without changes

crop-recommendation/src/pipeline/training_pipeline.py ADDED Viewed

	@@ -0,0 +1,95 @@

+from src.logger import logging
+from src.exception import CropException
+from src.utils import get_collection_as_dataframe
+from src.entity import config_entity
+from src.entity import artifact_entity
+import sys
+from src.components.data_ingestion import DataIngestion
+from src.components.data_validation import DataValidation
+from src.components.data_trasformation import DataTransformation
+from src.components.model_trainer import ModelTrainer
+from src.components.model_evaluation import ModelEvaluation
+from src.components.model_pusher import ModelPusher
+def start_training_pipeline():
+    try:
+        training_pipeline_config = config_entity.TrainingPipelineConfig()
+        # data ingestion
+        data_ingestion_config = config_entity.DataIngestionConfig(
+            training_pipeline_config=training_pipeline_config
+        )
+        data_ingestion_config.to_dict()
+        data_ingestion = DataIngestion(data_ingestion_config=data_ingestion_config)
+        data_ingestion_artifact = data_ingestion.initiate_data_ingestion()
+        print(f"Data Ingestion complete")
+        # data validation
+        data_validation_config = config_entity.DataValidationConfig(
+            training_pipeline_config=training_pipeline_config
+        )
+        data_validation = DataValidation(
+            data_validation_config=data_validation_config,
+            data_ingestion_artifact=data_ingestion_artifact,
+        )
+        data_validation.initiate_data_validation()
+        print(f"Data Validation Complete")
+        # data transformation
+        data_transformation_config = config_entity.DataTransformationConfig(
+            training_pipeline_config=training_pipeline_config
+        )
+        data_transformation = DataTransformation(
+            data_transformation_config=data_transformation_config,
+            data_ingestion_artifact=data_ingestion_artifact,
+        )
+        data_transformation_artifact = (
+            data_transformation.initiate_data_transformation()
+        )
+        print(f"Data Transformation Complete")
+        # model trainer
+        model_trainer_config = config_entity.ModelTrainerConfig(
+            training_pipeline_config=training_pipeline_config
+        )
+        model_trainer = ModelTrainer(
+            model_trainer_config=model_trainer_config,
+            data_transformation_artifact=data_transformation_artifact,
+        )
+        model_trainer_artifact = model_trainer.initiate_model_trainer()
+        print(f"Model Training Complete")
+        # model evaluation
+        model_eval_config = config_entity.ModelEvaluationConfig(
+            training_pipeline_config=training_pipeline_config
+        )
+        model_eval = ModelEvaluation(
+            model_eval_config=model_eval_config,
+            data_ingesiton_artifact=data_ingestion_artifact,
+            data_transformation_artifact=data_transformation_artifact,
+            model_trainer_artifact=model_trainer_artifact,
+        )
+        model_eval_artifact = model_eval.initiate_model_evaluation()
+        print(f"Model Evaluation Complete")
+        # Model Puhser
+        model_pusher_config = config_entity.ModelPusherConfig(training_pipeline_config=training_pipeline_config)
+        model_pusher = ModelPusher(model_pusher_config=model_pusher_config,
+                                   data_transformation_artifact=data_transformation_config,
+                                   model_trainer_artifact=model_trainer_artifact)
+        model_pusher_artifact = model_pusher.initiate_model_pusher()
+        print(f"Model Pusher Complete")
+    except Exception as e:
+        print(e)

crop-recommendation/src/predictor.py ADDED Viewed

	@@ -0,0 +1,100 @@

+from src.entity.config_entity import TRANSFORMER_OBJECT_FILE_NAME
+from src.entity.config_entity import MODEL_FILE_NAME
+from src.entity.config_entity import TARGET_ENCODER_OBJECT_FILE_NAME
+from src.exception import CropException
+from src.logger import logging
+import os
+import sys
+from glob import glob
+from typing import Optional
+class ModelResolver:
+    def __init__(
+        self,
+        model_registry: str = "saved_models",
+        transformer_dir_name="transformer",
+        target_encoder_dir_name="target_encoder",
+        model_dir_name="model",
+    ):
+        self.model_registry = model_registry
+        os.makedirs(self.model_registry, exist_ok=True)
+        self.transformer_dir_name = transformer_dir_name
+        self.target_encoder_dir_name = target_encoder_dir_name
+        self.model_dir_name = model_dir_name
+    def get_latest_dir_path(self) -> Optional[str]:
+        try:
+            dir_names = os.listdir(self.model_registry)
+            if len(dir_names) == 0:
+                return None
+            dir_names = list(map(int, dir_names))
+            latest_dir_name = max(dir_names)
+            return os.path.join(self.model_registry, f"{latest_dir_name}")
+        except Exception as e:
+            raise CropException(e, sys)
+    def get_latest_model_path(self):
+        try:
+            latest_dir = self.get_latest_dir_path()
+            if latest_dir is None:
+                raise Exception(f"Model is not available")
+            return os.path.join(latest_dir, self.model_dir_name, MODEL_FILE_NAME)
+        except Exception as e:
+            raise CropException(e, sys)
+    def get_latest_transformer_path(self):
+        try:
+            latest_dir = self.get_latest_dir_path()
+            if latest_dir is None:
+                raise Exception(f"Transformer is not availabel")
+            return os.path.join(latest_dir, self.transformer_dir_name, TRANSFORMER_OBJECT_FILE_NAME)
+        except Exception as e:
+            raise CropException(e, sys)
+    def get_latest_target_encoder_path(self):
+        try:
+            latest_dir = self.get_latest_dir_path()
+            if latest_dir is None:
+                raise Exception(f"Target encoder is not available")
+            return os.path.join(latest_dir, self.target_encoder_dir_name, TARGET_ENCODER_OBJECT_FILE_NAME)
+        except Exception as e:
+            raise CropException(e, sys)
+    def get_latest_save_dir_path(self):
+        try:
+            latest_dir = self.get_latest_dir_path()
+            if latest_dir == None:
+                return os.path.join(self.model_registry, f"{0}")
+            latest_dir_num = int(os.path.basename(self.get_latest_dir_path()))
+            return os.path.join(self.model_registry, f"{latest_dir_num + 1}")
+        except Exception as e:
+            raise CropException(e, sys)
+    def get_latest_save_model_path(self):
+        try:
+            latest_dir = self.get_latest_save_dir_path()
+            return os.path.join(latest_dir, self.model_dir_name, MODEL_FILE_NAME)
+        except Exception as e:
+            raise CropException(e, sys)
+    def get_latest_save_transformer_path(self):
+        try:
+            latest_dir = self.get_latest_save_dir_path()
+            return os.path.join(latest_dir, self.transformer_dir_name, TRANSFORMER_OBJECT_FILE_NAME)
+        except Exception as e:
+            raise CropException(e, sys)
+    def get_latest_save_target_encoder_path(self):
+        try:
+            latest_dir = self.get_latest_save_dir_path()
+            return os.path.join(latest_dir, self.target_encoder_dir_name, TARGET_ENCODER_OBJECT_FILE_NAME)
+        except Exception as e:
+            raise CropException(e, sys)

crop-recommendation/src/utils.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import pandas as pd
+from src.logger import logging
+from src.exception import CropException
+from src.config import mongo_client
+import os
+import sys
+import numpy as np
+import yaml
+import dill
+def get_collection_as_dataframe(
+    database_name: str, collection_name: str
+) -> pd.DataFrame:
+    """
+    Description: This function return collection as dataframe
+    =========================================================
+    Params:
+    database_name: database name
+    collection_name: collection name
+    =========================================================
+    return Pandas dataframe of a collection
+    """
+    try:
+        logging.info(
+            f"Reading data from database: {database_name} and collection: {collection_name}"
+        )
+        df = pd.DataFrame(list(mongo_client[database_name][collection_name].find()))
+        logging.info(f"{database_name} found in the mongodb")
+        if "_id" in df.columns:
+            logging.info("Dropping column: '_id'")
+            df = df.drop(columns=["_id"], axis=1)
+        logging.info(f"Row and columns in df: {df.shape}")
+        return df
+    except Exception as e:
+        raise CropException(e, sys)
+def seperate_dependant_column(df: pd.DataFrame, exclude_column: list) -> pd.DataFrame:
+    final_dataframe = df.drop(exclude_column, axis=1)
+    return final_dataframe
+def write_yaml_file(file_path, data: dict):
+    try:
+        file_dir = os.path.dirname(file_path)
+        os.makedirs(file_dir, exist_ok=True)
+        with open(file_path, "w") as file_writer:
+            yaml.dump(data, file_writer)
+    except Exception as e:
+        raise CropException(e, sys)
+def save_object(file_path: str, obj: object) -> None:
+    try:
+        logging.info("Entered the save object method of utils")
+        os.makedirs(os.path.dirname(file_path), exist_ok=True)
+        with open(file_path, "wb") as file_obj:
+            dill.dump(obj, file_obj)
+        logging.info("Exited the save object method of utils")
+    except Exception as e:
+        raise CropException(e, sys)
+def load_object(file_path: str) -> object:
+    try:
+        if not os.path.exists(file_path):
+            raise Exception(f"The file: {file_path} is not exists")
+        with open(file_path, "rb") as file_obj:
+            return dill.load(file_obj)
+    except Exception as e:
+        raise CropException(e, sys)
+def save_numpy_array_data(file_path: str, array: np.array):
+    """
+    save numpy array data to file
+    file_path : str location of the file to save
+    array: np.array data to save
+    """
+    try:
+        dir_path = os.path.dirname(file_path)
+        os.makedirs(dir_path, exist_ok=True)
+        with open(file_path, "wb") as file_ojb:
+            np.save(file_obj, array)
+    except Exception as e:
+        raise CropException(e, sys)
+def load_numpy_array_data(file_path: str) -> np.array:
+    """
+    load numpy array data from file
+    file_path: str location of file to load
+    return: np.array data loaded
+    """
+    try:
+        with open(file_path, "rb") as file_obj:
+            return np.load(file_obj, allow_pickle=True)
+    except Exception as e:
+        raise CropException(e, sys)