c
File size: 3,622 Bytes
17c5137
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import pandas as pd
from src.logger import logging
from src.exception import FertilizerException
from src.config import mongo_client
import os
import sys
import numpy as np
import yaml
import dill

def get_collection_as_dataframe(
    database_name: str, collection_name: str
) -> pd.DataFrame:
    """
    Description: This function return collection as dataframe
    =========================================================
    Params:
    database_name: database name
    collection_name: collection name
    =========================================================
    return Pandas dataframe of a collection
    """
    try:
        logging.info(
            f"Reading data from database: {database_name} and collection: {collection_name}"
        )
        df = pd.DataFrame(list(mongo_client[database_name][collection_name].find()))
        logging.info(f"{database_name} found in the mongodb")

        if "_id" in df.columns:
            logging.info("Dropping column: '_id'")
            df = df.drop(columns=["_id"], axis=1)
        logging.info(f"Row and columns in df: {df.shape}")
        return df
    except Exception as e:
        raise FertilizerException(e, sys)


def seperate_dependant_column(df: pd.DataFrame, exclude_column: list) -> pd.DataFrame:
    final_dataframe = df.drop(exclude_column, axis=1)

    return final_dataframe


def get_column_indices(numerical_features: list, categorical_features: list, base_file_path: str):

    dataset = pd.read_csv(base_file_path)

    numerical_feature_indices = [dataset.columns.get_loc(feature) for feature in numerical_features]
    categorical_feature_indices = [dataset.columns.get_loc(feature) for feature in categorical_features]

    return numerical_feature_indices, categorical_feature_indices


def write_yaml_file(file_path, data: dict):
    try:
        file_dir = os.path.dirname(file_path)
        os.makedirs(file_dir, exist_ok=True)

        with open(file_path, "w") as file_writer:
            yaml.dump(data, file_writer)
    except Exception as e:
        raise FertilizerException(e, sys)


def save_object(file_path: str, obj: object) -> None:
    try:
        logging.info("Entered the save object method of utils")
        os.makedirs(os.path.dirname(file_path), exist_ok=True)
        with open(file_path, "wb") as file_obj:
            dill.dump(obj, file_obj)
        logging.info("Exited the save object method of utils")
    except Exception as e:
        raise FertilizerException(e, sys)


def load_object(file_path: str) -> object:
    try:
        if not os.path.exists(file_path):
            raise Exception(f"The file: {file_path} is not exists")
        with open(file_path, "rb") as file_obj:
            return dill.load(file_obj)
    except Exception as e:
        raise FertilizerException(e, sys)


def save_numpy_array_data(file_path: str, array: np.array):
    """
    save numpy array data to file
    file_path : str location of the file to save
    array: np.array data to save
    """
    try:
        dir_path = os.path.dirname(file_path)
        os.makedirs(dir_path, exist_ok=True)

        with open(file_path, "wb") as file_ojb:
            np.save(file_obj, array)

    except Exception as e:
        raise FertilizerException(e, sys)


def load_numpy_array_data(file_path: str) -> np.array:
    """
    load numpy array data from file
    file_path: str location of file to load
    return: np.array data loaded
    """
    try:
        with open(file_path, "rb") as file_obj:
            return np.load(file_obj, allow_pickle=True)

    except Exception as e:
        raise CropException(e, sys)