bhuvaneshprasad commited on
Commit
e9f47fc
1 Parent(s): 1160f7b

files Uploaded

Browse files
assets/model_prediction.png ADDED
assets/residual_plot.png ADDED
assets/test_data_forecast.png ADDED
assets/tsForecast.png ADDED
assets/tsHome.png ADDED
assets/tsResults.png ADDED
config/config.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ artifacts_root: artifacts
2
+
3
+ data_ingestion:
4
+ root_dir: artifacts/data_ingestion
5
+ source_url: https://drive.google.com/file/d/1kI7vnsPB46Z2grGq9GSaUCaQ3Y1ekI9a/view?usp=sharing
6
+ data_dir: artifacts/data_ingestion/
7
+ scaler_path: artifacts/data_preprocessing
8
+
9
+ model_training:
10
+ root_dir: artifacts/model_training
11
+ trained_model_path: artifacts/model_training/model.keras
params.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ EPOCHS: 300
2
+ BATCH_SIZE: 32
3
+ LEARNING_RATE: 0.001
4
+ TIME_STEPS: 60
requirements.txt CHANGED
@@ -1,16 +1,16 @@
1
- tensorflow==2.17.0
2
- yfinance==0.2.40
3
- pandas==2.2.2
4
- numpy
5
- matplotlib==3.9.1
6
- seaborn==0.13.2
7
- python-box==7.2.0
8
- pyYAML==6.0.1
9
- tqdm==4.66.4
10
- ensure==1.0.4
11
- joblib==1.4.2
12
- types-PyYAML==6.0.12
13
- fastapi==0.111.0
14
- streamlit==1.36.0
15
- plotly==5.22.0
16
  scikit-learn
 
1
+ tensorflow==2.17.0
2
+ yfinance==0.2.40
3
+ pandas==2.2.2
4
+ numpy
5
+ matplotlib==3.9.1
6
+ seaborn==0.13.2
7
+ python-box==7.2.0
8
+ pyYAML==6.0.1
9
+ tqdm==4.66.4
10
+ ensure==1.0.4
11
+ joblib==1.4.2
12
+ types-PyYAML==6.0.12
13
+ fastapi==0.111.0
14
+ streamlit==1.36.0
15
+ plotly==5.22.0
16
  scikit-learn
tsForecaster/__init__.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import logging
4
+
5
+ logging_str = "[%(asctime)s: %(levelname)s: %(module)s: %(message)s]"
6
+
7
+ log_dir = "logs"
8
+ log_filepath = os.path.join(log_dir, "running_logs.log")
9
+ os.makedirs(log_dir, exist_ok=True)
10
+
11
+ logging.basicConfig(
12
+ level=logging.INFO,
13
+ format=logging_str,
14
+
15
+ handlers=[
16
+ logging.FileHandler(log_filepath),
17
+ logging.StreamHandler(sys.stdout)
18
+ ]
19
+ )
20
+
21
+ logger = logging.getLogger("tsForecasterLogger")
tsForecaster/components/__init__.py ADDED
File without changes
tsForecaster/components/data_ingestion.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import opendatasets as od
3
+ import yfinance as yf
4
+ import pandas as pd
5
+ from tsForecaster.entity.config_entity import DataIngestionConfig
6
+ from tsForecaster import logger
7
+
8
+ class DataIngestion:
9
+ def __init__(self, config:DataIngestionConfig) -> None:
10
+ self.config = config
11
+
12
+ def download_file(self) ->None:
13
+ try:
14
+ download_dir = self.config.data_dir
15
+ dataset_url = self.config.source_url
16
+ os.makedirs(download_dir, exist_ok=True)
17
+ logger.info(f"Downloading data from {dataset_url} into file {download_dir}")
18
+ od.download(dataset_url, data_dir=download_dir)
19
+ logger.info(f"Downloaded data from {dataset_url} into file {download_dir}")
20
+ except Exception as e:
21
+ raise e
22
+
23
+ def update_file(self) ->None:
24
+ try:
25
+ download_dir = self.config.data_dir
26
+ ticker = yf.Ticker("^NSEI")
27
+ history = ticker.history(start='2024-07-08', interval='1d')
28
+ history.drop(columns=['Volume', 'Dividends', 'Stock Splits'], inplace=True)
29
+ history = round(history, 2)
30
+ history['Index Name'] = "NIFTY 50"
31
+ history.reset_index(inplace=True)
32
+ history['Date'] = history['Date'].dt.strftime('%d %b %Y')
33
+ history = history[['Index Name', 'Date', 'Open', 'High', 'Low', 'Close']]
34
+ history = history.sort_values(by=['Date'], ascending=False)
35
+ df = pd.read_csv(os.path.join(download_dir ,"NIFTY 50_Historical.csv"))
36
+ df = pd.concat([history, df], ignore_index=True)
37
+ df = df[['Index Name', 'Date', 'Open', 'High', 'Low', 'Close']]
38
+ df['Date'] = pd.to_datetime(df['Date'])
39
+ df = df.drop_duplicates(subset=['Date'])
40
+ df.to_csv(os.path.join(download_dir ,"NIFTY 50_Historical.csv"))
41
+
42
+ except Exception as e:
43
+ raise e
44
+
tsForecaster/components/data_preprocessing.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ import joblib
4
+ import numpy as np
5
+ import pandas as pd
6
+ from sklearn.preprocessing import MinMaxScaler
7
+ from tsForecaster.entity.config_entity import DataIngestionConfig
8
+ from tsForecaster import logger
9
+
10
+ class DataPreProcessing:
11
+ def __init__(self, config: DataIngestionConfig) -> None:
12
+ self.config = config
13
+
14
+ def process_csv(self) -> pd.DataFrame:
15
+ logger.info("Processing data from CSV to DataFrame")
16
+ df_path = Path(self.config.data_dir) / "NIFTY 50_Historical.csv"
17
+ df = pd.read_csv(df_path, index_col=[0])
18
+ df.drop(columns=['Index Name', 'Open', 'High', 'Low'], inplace=True)
19
+ df['Date'] = pd.to_datetime(df['Date'])
20
+
21
+ start_date = '1990-07-03'
22
+ end_date = pd.Timestamp.today().strftime('%Y-%m-%d')
23
+ date_range = pd.date_range(start=start_date, end=end_date, freq='D')
24
+ date_df = pd.DataFrame(date_range, columns=['Date'])
25
+ df = pd.merge(date_df, df, how='left', on='Date')
26
+ df = df.ffill()
27
+
28
+ df['Close%'] = ((df['Close'] / df['Close'].shift(1)) - 1) * 100
29
+ for period, days in {
30
+ '1D': 1, '2D': 2, '3D': 3, '1W': 7, '2W': 14, '1M': 30, '2M': 60,
31
+ '3M': 90, '6M': 180, '1Y': 365, '2Y': 730, '3Y': 1095, '5Y': 1825,
32
+ '7Y': 2555, '10Y': 3650
33
+ }.items():
34
+ df[f'Close_{period}_ago'] = df['Close%'].shift(days)
35
+
36
+ df.dropna(inplace=True)
37
+ df.reset_index(drop=True, inplace=True)
38
+ df.set_index('Date', inplace=True)
39
+
40
+ logger.info("Done processing data from CSV to DataFrame")
41
+ return df
42
+
43
+ def scaling_data(self, df: pd.DataFrame):
44
+ logger.info("Scaling data...")
45
+ scaler = MinMaxScaler()
46
+ df_scaled = pd.DataFrame(scaler.fit_transform(df), columns=df.columns, index=df.index)
47
+ joblib.dump(scaler, Path(os.path.join(self.config.scaler_path, 'scaler.pkl')))
48
+ logger.info("Data scaling done")
49
+ return df_scaled
50
+
51
+ def create_sequences(self, df: pd.DataFrame, time_steps: int):
52
+ logger.info("Creating X, y sequences...")
53
+ X, y = [], []
54
+ for i in range(len(df) - time_steps):
55
+ X.append(df.iloc[i: i + time_steps, 1:].values)
56
+ y.append(df.iloc[i, 0])
57
+ dates = df.index[time_steps:]
58
+ logger.info("X, y sequences created")
59
+ return np.array(X), np.array(y), dates
60
+
61
+ def train_test_split(self, X, y, dates, train_len=0.8, val_len=0.1, test_len=0.1):
62
+ logger.info("Splitting train, val, test data...")
63
+ total_len = train_len + val_len + test_len
64
+ if total_len != 1.0:
65
+ logger.error("Error splitting train, val, test data: Total length is not equal to 1")
66
+ raise ValueError("Aggregate length of train, validation, and test lengths should be equal to 1")
67
+
68
+ total_size = len(y)
69
+ train_size = int(total_size * train_len)
70
+ val_size = int(total_size * val_len)
71
+
72
+ X_train, X_val, X_test = X[:train_size], X[train_size:train_size+val_size], X[train_size+val_size:]
73
+ y_train, y_val, y_test = y[:train_size], y[train_size:train_size+val_size], y[train_size+val_size:]
74
+ dates_train, dates_val, dates_test = dates[:train_size], dates[train_size:train_size+val_size], dates[train_size+val_size:]
75
+
76
+ logger.info("Done splitting train, val, test data")
77
+ return X_train, X_val, X_test, y_train, y_val, y_test, dates_train, dates_val, dates_test
tsForecaster/components/model_evaluation.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from matplotlib import pyplot as plt
3
+ import tensorflow as tf
4
+ from tsForecaster.entity.config_entity import ModelTrainingConfig
5
+ from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
6
+
7
+
8
+ class ModelEvaluation:
9
+ def __init__(self, config:ModelTrainingConfig) -> None:
10
+ self.config = config
11
+
12
+ @staticmethod
13
+ def load_model(path: Path) -> tf.keras.Model:
14
+ return tf.keras.models.load_model(path)
15
+
16
+ def evaluation(self, X, y, dates):
17
+ self.model = self.load_model(self.config.training_model_path)
18
+ y_pred = self.model.predict(X).flatten()
19
+
20
+ plt.figure(figsize=(15, 5))
21
+ plt.plot(dates, y)
22
+ plt.plot(dates, y_pred)
23
+ plt.legend(['y-true', 'y-pred'])
24
+ plt.show()
25
+
26
+ mae = mean_absolute_error(y, y_pred)
27
+ mse = mean_squared_error(y, y_pred)
28
+ rmse = mean_squared_error(y, y_pred, squared=False)
29
+ r2 = r2_score(y, y_pred)
30
+
31
+ return mae, mse, rmse, r2
tsForecaster/components/model_training.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ from pathlib import Path
3
+ from tsForecaster.entity.config_entity import ModelTrainingConfig
4
+ from tsForecaster.utils.common import create_directories
5
+
6
+
7
+ class ModelTraining:
8
+ def __init__(self, config: ModelTrainingConfig) -> None:
9
+ self.config = config
10
+ self.model = self.create_model()
11
+
12
+ create_directories([config.root_dir])
13
+
14
+ @staticmethod
15
+ def save_model(path: Path, model: tf.keras.Model):
16
+ model.save(path)
17
+
18
+ def create_model(self) -> tf.keras.Model:
19
+ model = tf.keras.models.Sequential([
20
+ tf.keras.layers.Input((self.config.params_time_steps, 15)),
21
+ tf.keras.layers.GRU(128, return_sequences=True),
22
+ tf.keras.layers.GRU(128, return_sequences=False),
23
+ tf.keras.layers.Dense(64, activation='relu'),
24
+ tf.keras.layers.Dense(64, activation='relu'),
25
+ tf.keras.layers.Dense(64, activation='relu'),
26
+ tf.keras.layers.Dense(1)
27
+ ])
28
+
29
+ model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(learning_rate=self.config.params_learning_rate), metrics=['mean_squared_error'])
30
+
31
+ model.summary()
32
+
33
+ return model
34
+
35
+ def train(self, X_train, y_train, X_val, y_val):
36
+ early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)
37
+ history = self.model.fit(
38
+ X_train,
39
+ y_train,
40
+ validation_data=(X_val, y_val),
41
+ epochs=self.config.params_epochs,
42
+ batch_size=self.config.params_batch_size,
43
+ callbacks=[early_stop]
44
+ )
45
+ self.save_model(path=self.config.training_model_path, model=self.model)
46
+ return history
tsForecaster/config/__init__.py ADDED
File without changes
tsForecaster/config/configuration.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ from tsForecaster.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH
4
+ from tsForecaster.utils.common import read_yaml, create_directories
5
+ from tsForecaster.entity.config_entity import DataIngestionConfig, ModelTrainingConfig
6
+
7
+ class ConfigurationManager:
8
+ def __init__(self, config_file_path=CONFIG_FILE_PATH, params_file_path=PARAMS_FILE_PATH) -> None:
9
+ self.config = read_yaml(config_file_path)
10
+ self.params = read_yaml(params_file_path)
11
+
12
+ create_directories([self.config.artifacts_root])
13
+
14
+ def get_data_ingestion_config(self) -> DataIngestionConfig:
15
+ config = self.config.data_ingestion
16
+
17
+ create_directories([config.root_dir])
18
+
19
+ data_ingestion_config = DataIngestionConfig(
20
+ root_dir=config.root_dir,
21
+ source_url=config.source_url,
22
+ data_dir=config.data_dir,
23
+ scaler_path=config.scaler_path
24
+ )
25
+
26
+ return data_ingestion_config
27
+
28
+ def get_model_training_config(self) -> ModelTrainingConfig:
29
+ model_training = self.config.model_training
30
+ params = self.params
31
+
32
+ create_directories([model_training.root_dir])
33
+
34
+ model_training_config = ModelTrainingConfig(
35
+ root_dir=Path(model_training.root_dir),
36
+ training_model_path=Path(model_training.trained_model_path),
37
+ training_data_path=Path(self.config.data_ingestion.data_dir),
38
+ params_epochs=params.EPOCHS,
39
+ params_batch_size=params.BATCH_SIZE,
40
+ params_learning_rate=params.LEARNING_RATE,
41
+ params_time_steps=params.TIME_STEPS
42
+ )
43
+
44
+ return model_training_config
tsForecaster/constants/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ CONFIG_FILE_PATH = Path("config/config.yaml")
4
+ PARAMS_FILE_PATH = Path("params.yaml")
tsForecaster/entity/__init__.py ADDED
File without changes
tsForecaster/entity/config_entity.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ from pathlib import Path
3
+
4
+ @dataclass(frozen=True)
5
+ class DataIngestionConfig:
6
+ root_dir: Path
7
+ source_url: Path
8
+ data_dir: Path
9
+ scaler_path: Path
10
+
11
+ @dataclass(frozen=True)
12
+ class ModelTrainingConfig:
13
+ root_dir: Path
14
+ training_model_path: Path
15
+ training_data_path: Path
16
+ params_time_steps: int
17
+ params_epochs: int
18
+ params_batch_size: int
19
+ params_learning_rate: float
20
+
tsForecaster/pipeline/__init__.py ADDED
File without changes
tsForecaster/pipeline/stage_01_data_ingestion.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from tsForecaster.components.data_ingestion import DataIngestion
2
+ from tsForecaster.config.configuration import ConfigurationManager
3
+ from tsForecaster import logger
4
+
5
+ STAGE_NAME = "Data Ingestion Stage"
6
+
7
+ class DataIngestionPipeline:
8
+ def __init__(self) -> None:
9
+ pass
10
+
11
+ def main(self):
12
+ try:
13
+ config = ConfigurationManager()
14
+ data_ingestion_config = config.get_data_ingestion_config()
15
+ data_ingestion = DataIngestion(config=data_ingestion_config)
16
+ data_ingestion.download_file()
17
+ data_ingestion.update_file()
18
+ except Exception as e:
19
+ raise e
20
+
21
+ if __name__ == '__main__':
22
+ try:
23
+ logger.info(f">>>>> stage {STAGE_NAME} started <<<<<")
24
+ obj = DataIngestionPipeline()
25
+ obj.main()
26
+ logger.info(f">>>>> stage {STAGE_NAME} completed <<<<<")
27
+ except Exception as e:
28
+ logger.exception(e)
29
+ raise e
tsForecaster/pipeline/stage_02_data_preprocessing.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from tsForecaster.components.data_preprocessing import DataPreProcessing
2
+ from tsForecaster import logger
3
+ from tsForecaster.config.configuration import ConfigurationManager
4
+
5
+ STAGE_NAME = "Data Pre-Processing Stage"
6
+
7
+ class DataPreProcessingPipeline:
8
+ def __init__(self) -> None:
9
+ pass
10
+
11
+ def main(self):
12
+ try:
13
+ config = ConfigurationManager()
14
+ data_ingestion_config = config.get_data_ingestion_config()
15
+ data_preprocessing = DataPreProcessing(config=data_ingestion_config)
16
+ df = data_preprocessing.process_csv()
17
+ close_df = df['Close']
18
+ df.drop(columns=['Close'], inplace=True)
19
+ scaled_df = data_preprocessing.scaling_data(df)
20
+ time_steps = 60
21
+ X, y, dates = data_preprocessing.create_sequences(scaled_df, time_steps)
22
+ X_train, X_val, X_test, y_train, y_val, y_test, dates_train, dates_val, dates_test = data_preprocessing.train_test_split(X, y, dates, train_len=0.8, val_len=0.1, test_len=0.1)
23
+
24
+ return X_train, X_val, X_test, y_train, y_val, y_test, dates_train, dates_val, dates_test, close_df
25
+ except Exception as e:
26
+ raise e
27
+
28
+ if __name__ == '__main__':
29
+ try:
30
+ logger.info(f">>>>> stage {STAGE_NAME} started <<<<<")
31
+ obj = DataPreProcessingPipeline()
32
+ obj.main()
33
+ logger.info(f">>>>> stage {STAGE_NAME} completed <<<<<")
34
+ except Exception as e:
35
+ logger.exception(e)
36
+ raise e
tsForecaster/pipeline/stage_03_model_training.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from tsForecaster.components.model_training import ModelTraining
2
+ from tsForecaster.config.configuration import ConfigurationManager
3
+ from tsForecaster import logger
4
+
5
+ STAGE_NAME = "Model Training"
6
+
7
+ class ModelTrainingPipeline:
8
+ def __init__(self) -> None:
9
+ pass
10
+
11
+ def main(self, X_train, y_train, X_val, y_val):
12
+ try:
13
+ config = ConfigurationManager()
14
+ model_training_config = config.get_model_training_config()
15
+ model_training = ModelTraining(config=model_training_config)
16
+ model_training.train(X_train, y_train, X_val, y_val)
17
+ except Exception as e:
18
+ raise e
19
+
20
+ if __name__ == '__main__':
21
+ try:
22
+ logger.info(f">>>>> stage {STAGE_NAME} started <<<<<")
23
+ obj = ModelTrainingPipeline()
24
+ history = obj.main()
25
+ logger.info(f">>>>> stage {STAGE_NAME} completed <<<<<")
26
+ except Exception as e:
27
+ logger.exception(e)
28
+ raise e
tsForecaster/pipeline/stage_04_model_evaluation.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from tsForecaster import logger
3
+ from tsForecaster.components.model_evaluation import ModelEvaluation
4
+ from tsForecaster.config.configuration import ConfigurationManager
5
+
6
+ STAGE_NAME = "Model Evaluation Stage"
7
+
8
+ class ModelEvaluationPipeline:
9
+ def __init__(self) -> None:
10
+
11
+ pass
12
+
13
+ def main(self, X, y, dates):
14
+ try:
15
+ config = ConfigurationManager()
16
+ eval_config = config.get_model_training_config()
17
+ eval = ModelEvaluation(eval_config)
18
+ eval.evaluation(X, y, dates)
19
+ except Exception as e:
20
+ raise e
21
+
22
+ if __name__ == '__main__':
23
+ try:
24
+ logger.info(f">>>>> stage {STAGE_NAME} started <<<<<")
25
+ obj = ModelEvaluationPipeline()
26
+ obj.main()
27
+ logger.info(f">>>>> stage {STAGE_NAME} completed <<<<<")
28
+ except Exception as e:
29
+ logger.exception(e)
30
+ raise e
tsForecaster/utils/__init__.py ADDED
File without changes
tsForecaster/utils/common.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from box.exceptions import BoxValueError
3
+ import yaml
4
+ from tsForecaster import logger
5
+ import json
6
+ import joblib
7
+ from ensure import ensure_annotations
8
+ from box import ConfigBox
9
+ from pathlib import Path
10
+ from typing import Any
11
+ import base64
12
+
13
+ @ensure_annotations
14
+ def read_yaml(path_to_yaml: Path) -> ConfigBox:
15
+ """
16
+ Read and load data from a YAML file.
17
+
18
+ Args:
19
+ path_to_yaml (Path): Path to the YAML file.
20
+
21
+ Returns:
22
+ Box: A Box object containing the YAML file data.
23
+
24
+ Raises:
25
+ ValueError: If the YAML file is empty or cannot be loaded.
26
+ Exception: For any other unexpected errors during file reading or parsing.
27
+ """
28
+ try:
29
+ with open(path_to_yaml) as yaml_file:
30
+ content = yaml.safe_load(yaml_file)
31
+ logger.info(f"yaml file {path_to_yaml} loaded successfully")
32
+ return ConfigBox(content)
33
+ except BoxValueError:
34
+ raise ValueError("Yaml file is empty")
35
+ except Exception as e:
36
+ raise e
37
+
38
+ @ensure_annotations
39
+ def create_directories(path_to_directories: list, verbose: bool=True):
40
+ """
41
+ Create directories specified in the list.
42
+
43
+ Args:
44
+ path_to_directories (list): List of directory paths to create.
45
+ verbose (bool, optional): Whether to log directory creation (default is True).
46
+
47
+ Raises:
48
+ Exception: For any unexpected errors during directory creation.
49
+ """
50
+ try:
51
+ for dirs in path_to_directories:
52
+ os.makedirs(dirs, exist_ok=True)
53
+ if verbose:
54
+ logger.info(f"Created directory at: {path_to_directories}")
55
+ except Exception as e:
56
+ raise e
57
+
58
+ @ensure_annotations
59
+ def save_json(path: Path, data: dict):
60
+ """
61
+ Save JSON data to a file.
62
+
63
+ Args:
64
+ path (Path): Path to the JSON file.
65
+ data (dict): Dictionary containing JSON serializable data.
66
+
67
+ Raises:
68
+ Exception: For any unexpected errors during file saving.
69
+ """
70
+ try:
71
+ with open(path, 'w') as f:
72
+ json.dump(data, f, indent=4)
73
+ logger.info(f"Json saved at: {path}")
74
+ except Exception as e:
75
+ raise e
76
+
77
+ @ensure_annotations
78
+ def load_json(path: Path) -> ConfigBox:
79
+ """
80
+ Load JSON data from a file and return as a Box object.
81
+
82
+ Args:
83
+ path (Path): Path to the JSON file.
84
+
85
+ Returns:
86
+ Box: A Box object containing the JSON data.
87
+
88
+ Raises:
89
+ Exception: For any unexpected errors during file loading.
90
+ """
91
+ try:
92
+ with open(path, 'r') as f:
93
+ content = json.load(f)
94
+ logger.info(f"Successfully loaded json from: {path}")
95
+ return ConfigBox(content)
96
+ except Exception as e:
97
+ raise e
98
+
99
+ @ensure_annotations
100
+ def save_bin(data: Any, path: Path) -> None:
101
+ """
102
+ Save binary data using joblib.
103
+
104
+ Args:
105
+ data (Any): Data to be saved.
106
+ path (Path): Path to save the binary file.
107
+
108
+ Raises:
109
+ Exception: For any unexpected errors during file saving.
110
+ """
111
+ try:
112
+ joblib.dump(value=data, filename=path)
113
+ logger.info(f"Binary file saved at: {path}")
114
+ except Exception as e:
115
+ raise e
116
+
117
+ @ensure_annotations
118
+ def load_bin(path: Path):
119
+ """
120
+ Load binary data using joblib.
121
+
122
+ Args:
123
+ path (Path): Path to the binary file.
124
+
125
+ Returns:
126
+ Any: Loaded binary data.
127
+
128
+ Raises:
129
+ Exception: For any unexpected errors during file loading.
130
+ """
131
+ joblib.load(filename=path)
132
+ logger.info(f"Successfully loaded binary from: {path}")
133
+
134
+ @ensure_annotations
135
+ def get_size(path: Path) -> str:
136
+ """
137
+ Get the size of a file in kilobytes.
138
+
139
+ Args:
140
+ path (Path): Path to the file.
141
+
142
+ Returns:
143
+ str: Size of the file in kilobytes formatted as "{size} kb".
144
+
145
+ Raises:
146
+ Exception: For any unexpected errors during file size retrieval.
147
+ """
148
+ try:
149
+ size_in_kb = round(os.path.getsize(path)/1024, 2)
150
+ return f"{size_in_kb} kb"
151
+ except Exception as e:
152
+ raise e
153
+
154
+ def decodeImage(imgString, fileName) -> None:
155
+ """
156
+ Decode base64 encoded image data and save it to a file.
157
+
158
+ Args:
159
+ imgString (str): Base64 encoded image data.
160
+ fileName (str): Name of the file to save the decoded image data.
161
+
162
+ Raises:
163
+ Exception: For any unexpected errors during decoding or file writing.
164
+ """
165
+ try:
166
+ imgData = base64.b64decode(imgString)
167
+ with open(fileName, 'wb') as f:
168
+ f.write(imgData)
169
+ f.close
170
+ except Exception as e:
171
+ raise e
172
+
173
+ def encodeImage(imagePath) -> bytes:
174
+ """
175
+ Encode an image file to base64 bytes.
176
+
177
+ Args:
178
+ imagePath (str): Path to the image file.
179
+
180
+ Returns:
181
+ bytes: Base64 encoded bytes of the image data.
182
+
183
+ Raises:
184
+ Exception: For any unexpected errors during encoding or file reading.
185
+ """
186
+ try:
187
+ with open(imagePath, 'rb') as f:
188
+ imgData = f.read()
189
+ return base64.b64encode(imgData)
190
+ except Exception as e:
191
+ raise e