Spaces:

Sadashiv
/

c

Runtime error

File size: 3,622 Bytes

17c5137

import pandas as pd
from src.logger import logging
from src.exception import FertilizerException
from src.config import mongo_client
import os
import sys
import numpy as np
import yaml
import dill

def get_collection_as_dataframe(
    database_name: str, collection_name: str
) -> pd.DataFrame:
    """
    Description: This function return collection as dataframe
    =========================================================
    Params:
    database_name: database name
    collection_name: collection name
    =========================================================
    return Pandas dataframe of a collection
    """
    try:
        logging.info(
            f"Reading data from database: {database_name} and collection: {collection_name}"
        )
        df = pd.DataFrame(list(mongo_client[database_name][collection_name].find()))
        logging.info(f"{database_name} found in the mongodb")

        if "_id" in df.columns:
            logging.info("Dropping column: '_id'")
            df = df.drop(columns=["_id"], axis=1)
        logging.info(f"Row and columns in df: {df.shape}")
        return df
    except Exception as e:
        raise FertilizerException(e, sys)


def seperate_dependant_column(df: pd.DataFrame, exclude_column: list) -> pd.DataFrame:
    final_dataframe = df.drop(exclude_column, axis=1)

    return final_dataframe


def get_column_indices(numerical_features: list, categorical_features: list, base_file_path: str):

    dataset = pd.read_csv(base_file_path)

    numerical_feature_indices = [dataset.columns.get_loc(feature) for feature in numerical_features]
    categorical_feature_indices = [dataset.columns.get_loc(feature) for feature in categorical_features]

    return numerical_feature_indices, categorical_feature_indices


def write_yaml_file(file_path, data: dict):
    try:
        file_dir = os.path.dirname(file_path)
        os.makedirs(file_dir, exist_ok=True)

        with open(file_path, "w") as file_writer:
            yaml.dump(data, file_writer)
    except Exception as e:
        raise FertilizerException(e, sys)


def save_object(file_path: str, obj: object) -> None:
    try:
        logging.info("Entered the save object method of utils")
        os.makedirs(os.path.dirname(file_path), exist_ok=True)
        with open(file_path, "wb") as file_obj:
            dill.dump(obj, file_obj)
        logging.info("Exited the save object method of utils")
    except Exception as e:
        raise FertilizerException(e, sys)


def load_object(file_path: str) -> object:
    try:
        if not os.path.exists(file_path):
            raise Exception(f"The file: {file_path} is not exists")
        with open(file_path, "rb") as file_obj:
            return dill.load(file_obj)
    except Exception as e:
        raise FertilizerException(e, sys)


def save_numpy_array_data(file_path: str, array: np.array):
    """
    save numpy array data to file
    file_path : str location of the file to save
    array: np.array data to save
    """
    try:
        dir_path = os.path.dirname(file_path)
        os.makedirs(dir_path, exist_ok=True)

        with open(file_path, "wb") as file_ojb:
            np.save(file_obj, array)

    except Exception as e:
        raise FertilizerException(e, sys)


def load_numpy_array_data(file_path: str) -> np.array:
    """
    load numpy array data from file
    file_path: str location of file to load
    return: np.array data loaded
    """
    try:
        with open(file_path, "rb") as file_obj:
            return np.load(file_obj, allow_pickle=True)

    except Exception as e:
        raise CropException(e, sys)