Sadashiv's picture
Upload 31 files
625ed08 verified
raw
history blame contribute delete
No virus
3.17 kB
import pandas as pd
from src.logger import logging
from src.exception import CropException
from src.config import mongo_client
import os
import sys
import numpy as np
import yaml
import dill
def get_collection_as_dataframe(
database_name: str, collection_name: str
) -> pd.DataFrame:
"""
Description: This function return collection as dataframe
=========================================================
Params:
database_name: database name
collection_name: collection name
=========================================================
return Pandas dataframe of a collection
"""
try:
logging.info(
f"Reading data from database: {database_name} and collection: {collection_name}"
)
df = pd.DataFrame(list(mongo_client[database_name][collection_name].find()))
logging.info(f"{database_name} found in the mongodb")
if "_id" in df.columns:
logging.info("Dropping column: '_id'")
df = df.drop(columns=["_id"], axis=1)
logging.info(f"Row and columns in df: {df.shape}")
return df
except Exception as e:
raise CropException(e, sys)
def seperate_dependant_column(df: pd.DataFrame, exclude_column: list) -> pd.DataFrame:
final_dataframe = df.drop(exclude_column, axis=1)
return final_dataframe
def write_yaml_file(file_path, data: dict):
try:
file_dir = os.path.dirname(file_path)
os.makedirs(file_dir, exist_ok=True)
with open(file_path, "w") as file_writer:
yaml.dump(data, file_writer)
except Exception as e:
raise CropException(e, sys)
def save_object(file_path: str, obj: object) -> None:
try:
logging.info("Entered the save object method of utils")
os.makedirs(os.path.dirname(file_path), exist_ok=True)
with open(file_path, "wb") as file_obj:
dill.dump(obj, file_obj)
logging.info("Exited the save object method of utils")
except Exception as e:
raise CropException(e, sys)
def load_object(file_path: str) -> object:
try:
if not os.path.exists(file_path):
raise Exception(f"The file: {file_path} is not exists")
with open(file_path, "rb") as file_obj:
return dill.load(file_obj)
except Exception as e:
raise CropException(e, sys)
def save_numpy_array_data(file_path: str, array: np.array):
"""
save numpy array data to file
file_path : str location of the file to save
array: np.array data to save
"""
try:
dir_path = os.path.dirname(file_path)
os.makedirs(dir_path, exist_ok=True)
with open(file_path, "wb") as file_ojb:
np.save(file_obj, array)
except Exception as e:
raise CropException(e, sys)
def load_numpy_array_data(file_path: str) -> np.array:
"""
load numpy array data from file
file_path: str location of file to load
return: np.array data loaded
"""
try:
with open(file_path, "rb") as file_obj:
return np.load(file_obj, allow_pickle=True)
except Exception as e:
raise CropException(e, sys)