# TDCE Learning Model Basic Model Construction

This notebook display the basic construction of Time Driven Cost Estimation Learning Model step by step without using the experiment script (experiment_script.py)

Import the library

In [None]:
import pandas as pd
import importlib
import sys
import os
import time
import numpy as np
import requests
import ipywidgets as widgets
from IPython.display import display
import git

If run from online source or Google Collab please run this code for clone the model file

In [None]:
run_from_online = True
ignore_download_dataset = False


if run_from_online:
 try:
 repo = git.Repo.clone_from('https://huggingface.co/iaecpsu-1/tdce-basic',
 './tdce-basic',
 branch='main')
 except git.exc.GitCommandError:
 print("Repository already exists or cannot be cloned. Continuing with existing files.")
 pass

 # fmt:off
 sys.path.append('./tdce-basic/model')
 sys.path.append('./tdce-basic/functions/matrix_generator')
 sys.path.append('./tdce-basic/functions')
 sys.path.append('./tdce-basic/functions/data_extractor')

 try:
 # For online execution, in Google Colab
 !pip install tensor-sensor
 except:
 pass
 
 import tdce_model as tdce
 import material_fc_layer as mfl
 import employee_fc_layer as efl
 import capital_fc_layer as cfl
 import loss
 import cost_matrix_class as cmc
 import display_input_variation as diva
 import viyacrab_augmentation as viya
 import adjust_data as ajd
 import result_display as rd
 import mini_plot as mp
 # fmt:on
 

else :
 # fmt:off
 sys.path.append('../model')
 sys.path.append('../functions/matrix_generator')
 sys.path.append('../functions')
 sys.path.append('../functions/data_extractor')
 
 import tdce_model as tdce
 import material_fc_layer as mfl
 import employee_fc_layer as efl
 import capital_fc_layer as cfl
 import loss
 import cost_matrix_class as cmc
 import display_input_variation as diva
 import viyacrab_augmentation as viya
 import adjust_data as ajd
 import result_display as rd
 import mini_plot as mp
 # fmt:on

importlib.reload(tdce)
importlib.reload(mfl)
importlib.reload(efl)
importlib.reload(cfl)
importlib.reload(loss)
importlib.reload(tdce)
importlib.reload(cmc)
importlib.reload(diva)
importlib.reload(viya)
importlib.reload(ajd)
importlib.reload(rd)
importlib.reload(mp)

## Dataset 
We will use our project dataset for experimental, we pick the [extended-random-dataset](https://huggingface.co/datasets/theethawats98/tdce-example-extended-random) which is the dataset with high dimension but moderate variation to use as case study for out demonstation. We create the dataset in folder `datasets` and then inside it have the folder `extended-random` again. We will create the folder if it is not exist and download the datafile from the our huggingface.

In [None]:
try:
 os.mkdir('result')
 os.mkdir(f"datasets")
 os.mkdir(f"datasets/extended-random")
except FileExistsError:
 print("Folder is Exist")
 pass

Download Files

In [None]:
def download_file():
 capital_cost_link = "https://huggingface.co/datasets/theethawats98/tdce-example-extended-random/resolve/main/generated_capital_cost.csv"
 capital_path = 'datasets/extended-random/generated_capital_cost.csv'
 employee_usage_link = "https://huggingface.co/datasets/theethawats98/tdce-example-extended-random/resolve/main/generated_employee_usage.csv"
 employee_path = 'datasets/extended-random/generated_employee_usage.csv'
 material_usage_link = "https://huggingface.co/datasets/theethawats98/tdce-example-extended-random/resolve/main/generated_material_usage.csv"
 material_path = 'datasets/extended-random/generated_material_usage.csv'
 process_data_link = "https://huggingface.co/datasets/theethawats98/tdce-example-extended-random/resolve/main/generated_process_data.csv"
 process_path = 'datasets/extended-random/generated_process_data.csv'


 for link, path in [
 (capital_cost_link, capital_path),
 (employee_usage_link, employee_path),
 (material_usage_link, material_path),
 (process_data_link, process_path)
 ]:
 if not os.path.exists(path):
 response = requests.get(link)
 if response.status_code == 200:
 with open(path, 'wb') as file:
 file.write(response.content)
 print(f'File {path} downloaded successfully')
 else:
 print(f'Failed to download file {path}')
 # Downloading the datasets
 print("Downloading datasets...")

if (not ignore_download_dataset):
 download_file()

## Model Setting
Select the correct setting for your model.

In [None]:
hour_day_employee_widget = widgets.BoundedIntText(
 value=8,
 min=1,
 max=24,
 step=1,
 description='Hours per Day for Employee:',
 disabled=False
)


hour_day_capital_cost_widget = widgets.BoundedIntText(
 value=21,
 min=1,
 max=24,
 step=1,
 description='Hours per Day for Utility / Capital Cost:',
 disabled=False
)

use_outlier_removal_widget = widgets.Checkbox(
 value=False,
 description='Enable Outlier Removal',
 disabled=False,
 indent=False
)



outlier_index_widget = widgets.Dropdown(
 options=['1', '1.5', '2'],
 value='1.5',
 description='Removal Idication Index:',
 disabled=False,
)

use_augmentation_widget = widgets.Checkbox(
 value=False,
 description='Enable Data Augmentation',
 disabled=False,
 indent=False
)

use_early_stopping_widget = widgets.Checkbox(
 value=False,
 description='Enable Early Stopping',
 disabled=False,
 indent=False
)

early_stopping_patience_widget = widgets.BoundedIntText(
 value=10,
 min=1,
 max=100,
 step=1,
 description='Early Stopping Patience Round:',
 disabled=False
)

element_level_lr_widget = widgets.Dropdown(
 options=['0.001','0.05', '0.01','0.1','0.5'],
 value='0.01',
 description='Element Level Learning Rate:',
 disabled=False,
)

model_level_lr_widget = widgets.Dropdown(
 options=['0.0000001','0.00000001','0.000000001'],
 value='0.00000001',
 description='Model Level Learning Rate:',
 disabled=False,
)
epoch_widget = widgets.BoundedIntText(
 value=100,
 min=10,
 max=1000,
 step=10,
 description='Number of Epochs:',
 disabled=False
)


display(hour_day_employee_widget)
display(hour_day_capital_cost_widget)
display(use_outlier_removal_widget)
display(outlier_index_widget)
display(use_augmentation_widget)
display(use_early_stopping_widget)
display(early_stopping_patience_widget)
display(element_level_lr_widget)
display(model_level_lr_widget)
display(epoch_widget)

If you config the value on the widget, please reexecute all of these bottom cells for create a model according to your data.


Getting Value from Widget

In [None]:
hour_day_employee = hour_day_employee_widget.value
hour_day_capital_cost = hour_day_capital_cost_widget.value
use_outlier_removal= use_outlier_removal_widget.value
outlier_index = float(outlier_index_widget.value)
use_augmentation = use_augmentation_widget.value
element_level_lr = float(element_level_lr_widget.value)
model_level_lr = float(model_level_lr_widget.value)
use_early_stopping =use_early_stopping_widget.value
early_stopping_patience = early_stopping_patience_widget.value
epoch_number = epoch_widget.value

Generated Cost Matrix

In [None]:
folder_path = 'datasets/extended-random'
output_folder_path = f'result/{model_level_lr}'

cost_generator = cmc.CostMatrixGenerator()
cost_generator.change_data_directory(folder_path)
cost_generator.load_data()
input_variation = diva.display_input_variation_by_directory(folder_path)
input_variation.to_csv(f"{folder_path}/data_variation.csv")

(process_df,employee_usage,material_usage,capital_cost_usage) = cost_generator.get_data()

If you want to see the raw data, you can display using `process_df`, `process_df.head()`, `employee_df` and so on.

## Pre-Processing
djust and filter in input datasets (Material, Employee, Capital Cost) to match the output dataset (Process Dataset) and Depend on Outlier Removal Condition

In [None]:
if use_outlier_removal:
 cost_generator.remove_outlier_iqr(outlier_index)
 (
 new_process_df,
 new_employee_usage,
 new_material_usage,
 new_capital_cost_usage,
 ) = cost_generator.get_data()
 (new_capital_cost_usage, new_employee_usage, new_material_usage) = (
 ajd.adjust_to_match_process(
 capital_cost_usage=new_capital_cost_usage,
 employee_usage=new_employee_usage,
 material_usage=new_material_usage,
 new_process_df=new_process_df,
 )
 )
 new_variation = diva.display_input_variation(
 new_process_df,
 new_material_usage,
 new_employee_usage,
 new_capital_cost_usage,
 )
 new_variation.to_csv(f"{folder_path}/data_variation_after_outlier.csv")
 new_process_df.to_csv(f"{folder_path}/process_df_after_outlier.csv")
 try:
 print("Data Variation After Outlier Removed")
 display(new_variation)
 except Exception as e:
 print("This is not Jupyter Notebook", e)
else:
 display(input_variation)

### Data Splitting
Split training and validation dataset

In [None]:
(
 train_process_df,
 train_employee_usage,
 train_material_usage,
 train_capital_cost,
 validate_process_df,
 validate_employee_usage,
 validate_material_usage,
 validate_capital_cost,
) = cost_generator.train_test_split_without_matrix(0.7)


# Generate Cost Matrix for Validation Set
validation_payload = cost_generator.get_validation_payload(
 validate_process_df
)


# Display Variation of Train Data
train_variation = diva.display_input_variation(
 train_process_df,
 train_material_usage,
 train_employee_usage,
 train_capital_cost,
)
train_variation.to_csv(
 f"{folder_path}/train_data_variation.csv")
train_process_df.to_csv(
 f"{folder_path}/train_process_df.csv")
# Display Variation of Validation Data
validate_variation = diva.display_input_variation(
 validate_process_df,
 validate_material_usage,
 validate_employee_usage,
 validate_capital_cost,
)
validate_variation.to_csv(
 f"{folder_path}/validate_data_variation.csv"
)
if use_augmentation:
 # TODO: Increase the Generalization of the Model
 # Augmented the Imbalance Class of Training Data
 train_process_df.to_csv(
 f"{folder_path}/train_process_df_before_augmented.csv"
 )
 train_process_df = viya.vy_training_augmentation(
 train_process_df)
 # Display Variation of Train Data After Augmented
 train_variation = diva.display_input_variation(
 train_process_df,
 train_material_usage,
 train_employee_usage,
 train_capital_cost,
 )
 train_variation.to_csv(
 f"{folder_path}/train_data_variation_after_augmented.csv"
 )
 train_process_df.to_csv(
 f"{folder_path}/train_process_df_after_augmented_{round}.csv"
 )
 
# Generate Matrix From Training Set
(
 material_cost_matrix,
 material_amount_matrix,
 employee_cost_matrix,
 employee_duration_matrix,
 employee_day_amount_matrix,
 capital_cost_matrix,
 day_amount_matrix,
 capital_cost_duration_matrix, # New On Finetune
 result_matrix,
) = cost_generator.generate_data_from_input(
 train_process_df,
 train_material_usage,
 train_employee_usage,
 train_capital_cost,
)

## Initial Model

### Initial Layer
Create function to initial layer from cost matrix, it will automatically find what the input size is need for due to the data

In [None]:
def inital_layer(
 material_cost_matrix,
 employee_cost_matrix,
 capital_cost_matrix,
):
 total_col = 0
 # Material FC Layer
 row, high, col = material_cost_matrix.shape
 material_layer_1 = mfl.MaterialFCLayer(col, 1)
 # material_layer_1.annotate(material_cost_matrix, material_amount_matrix)
 total_col += col

 # Monthy Employee FC Layer
 row, high, col = employee_cost_matrix.shape
 employee_layer_1 = efl.EmployeeFCLayer(col, 1, 8)
 total_col += col
 # monthy_employee_layer_1.annotate(monthy_employee_cost_matrix, duration_matrix)

 # Capital Cost FC Layer
 row, high, col = capital_cost_matrix.shape
 capital_cost_layer1 = cfl.CapitalCostFCLayer(col, 1, 21)
 total_col += col
 # capital_cost_layer1.annotate(
 # capital_cost_matrix, life_time_matrix, machine_hour_matrix, duration_matrix)

 return (
 material_layer_1,
 employee_layer_1,
 capital_cost_layer1,
 )


### Model Initialization
Create the model object from its class

In [None]:
# Initial Model
tdce_model = tdce.TDCEModel()
 
# Create the Layer
(
 material_layer_1,
 employee_layer_1,
 capital_cost_layer1,
) = inital_layer(
 capital_cost_matrix=capital_cost_matrix,
 employee_cost_matrix=employee_cost_matrix,
 material_cost_matrix=material_cost_matrix,
)

# Add the Layer to the Model
tdce_model.inital_inside_element(
 material_layer=material_layer_1,
 capital_cost_layer=capital_cost_layer1,
 employee_layer=employee_layer_1,
)

# Install the error calculator
tdce_model.use(loss=loss.mse, loss_prime=loss.mse_prime,
 loss_percent=loss.rmspe)

# Set Early Stopping
if use_early_stopping:
 tdce_model.activate_early_stopping()
 tdce_model.edit_patience_round(early_stopping_patience)

Setting the learning rate

In [None]:
# Use the same learning rate for all element level
tdce_model.set_learning_rate(
 element_level_lr,element_level_lr,element_level_lr
)

# activate model weight
tdce_model.activete_model_weight()

## Training

Fit a model with input and output data

In [None]:
start_time = time.time()

tdce_model.fit_with_validation(
 epoch=epoch_number,
 learning_rate=model_level_lr,
 material_amount_matrix=material_amount_matrix,
 material_cost_matrix=material_cost_matrix,
 employee_cost_matrix=employee_cost_matrix,
 employee_duration_matrix=employee_duration_matrix,
 employee_day_amount_matrix=employee_day_amount_matrix,
 result_matrix=result_matrix,
 capital_cost_matrix=capital_cost_matrix,
 day_amount_matrix=day_amount_matrix,
 validation_payload=validation_payload,
 capital_cost_duration_matrix=capital_cost_duration_matrix,
 )

end_time = time.time()
time_usage = end_time - start_time
print(f"Learning Rate: {model_level_lr} / {element_level_lr}")
print(f"Time Using {time_usage} Second")


Get the Error Listing

In [None]:
try:
 os.mkdir(f"{output_folder_path}")
except FileExistsError:
 print("Folder is Exist")
 pass

In [None]:
error_list = tdce_model.get_epoch_error()
sample_error_list = tdce_model.get_sample_error()
error_df = pd.DataFrame(error_list)
sample_error_df = pd.DataFrame(sample_error_list)


# Get Overall Output
minimum_error = error_df["error"].min()
minimum_percent_error = error_df["error_percent"].min()
minimum_validate_error = error_df["validate_error"].min()
minimum_validate_percent_error = error_df["validate_error_percent"].min()

# Export the Output Result
error_df.to_csv(f"{output_folder_path}/{epoch_number}-{element_level_lr}.csv")
sample_error_df.to_csv(
 f"{output_folder_path}/error-list-{epoch_number}-{element_level_lr}.csv"
)
sample_payload = tdce_model.get_sample_payload()
sample_payload_df = pd.DataFrame(sample_payload)

# Export all the sample / history of all training
sample_payload_df.to_csv(
 f"{output_folder_path}/sample-payload-list-{epoch_number}-{element_level_lr}.csv",
 index=False,
)

print(f"Minimum Error: {minimum_error}"
 f" / Minimum Percent Error (RMSPE): {minimum_percent_error}"
 f" / Minimum Validate Error: {minimum_validate_error}"
 f" / Minimum Validate Percent Error (RMSEP): {minimum_validate_percent_error}")


## Visualization

### Error Behavior
Display the model training behavior

In [None]:
mp.plotting_learning_curve(epoch_error=error_df,element_learning_rate=element_level_lr,
 model_learning_rate=model_level_lr)

### Weight Adjustment Behavior
Display the Weight of Each Model Element

In [None]:
importlib.reload(mp)
mp.plot_model_level_weight(adjustment_data=sample_payload_df,epoch_error=error_df)

Display the weight of each material, labor, and utility cost object.

In [None]:
material_columns = [col for col in sample_payload_df.columns if col.startswith('material_weight_')]
employee_columns = [col for col in sample_payload_df.columns if col.startswith('employee_weight_')]
capital_columns = [col for col in sample_payload_df.columns if col.startswith('capital_cost_weight_')]


importlib.reload(mp)
mp.plot_element_level_weight(
 adjustment_data=sample_payload_df,
 material_columns=material_columns,labor_columns= employee_columns,
 utility_columns= capital_columns)

## Export Model
Export Model to keep and use in another place

In [None]:
tdce_model.export_model("tdce_model.pkl")

© 2025, Intelligent Automation Engineering Center, Prince of Songkla University