########################################################################
# This file does not need to by run here. It's the code used to train the CNN model for images classification.
# From this code, I got the model file what I used into the agent for classification disease.
########################################################################


# Import the warnings module, which allows control over warning messages
import warnings

# Suppress all warnings in the script
warnings.filterwarnings("ignore")

# === Standard Library Imports ===
import os  # For interacting with the operating system (e.g., file paths)

# === Data Handling and Manipulation ===
import pandas as pd  # For handling and manipulating datasets
import numpy as np  # For numerical operations

# === Data Visualization ===
import matplotlib.pyplot as plt  # For creating static visualizations
import seaborn as sns  # For enhanced data visualizations (e.g., heatmaps)

# === Machine Learning Utilities ===
from sklearn.model_selection import train_test_split  # For splitting data into train/test sets
from sklearn.metrics import classification_report, confusion_matrix  # For evaluating classification performance

# === Deep Learning Libraries ===
import tensorflow as tf  # TensorFlow for deep learning
from tensorflow.keras import layers, models  # Core TensorFlow Keras layers and model building tools
from tensorflow.keras.models import Sequential  # For creating sequential models
from tensorflow.keras.preprocessing.image import ImageDataGenerator  # For image augmentation and loading
from tensorflow.keras.preprocessing.image import load_img, img_to_array  # For loading and converting images
from tensorflow import keras

# === CNN Model Components ===
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense  # Core CNN layers
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.layers import Dropout, BatchNormalization, GlobalAveragePooling2D  # Advanced layers for regularization and optimization

# === Pre-trained Model Import ===
from tensorflow.keras.applications import VGG16  # Importing the VGG16 model for transfer learning

# === Regularization ===
from tensorflow.keras import regularizers  # For adding regularization to the model


# Get the link of the images for google drive #### IMPORTANT. This google drive link need to be adapted to be used here in Hugging Face.
images_folder = "/content/drive/MyDrive/Colab Notebooks/Dataset/IMG_CLASSES/"

# Definie parameter to users for the Training, Validation, testing
batch_size = 32
img_height = 180
img_width = 180
seed = 123

# Training data splitted
train_data = image_dataset_from_directory(
  images_folder,
  validation_split=0.2, #
  subset="training",
  seed=seed,
  image_size=(img_height, img_width),
  batch_size=batch_size
  )

# Validation data will be splitted again into val and test
val_test_data = image_dataset_from_directory(
  images_folder,
  validation_split=0.2,
  subset="validation",
  seed=seed,
  image_size=(img_height, img_width),
  batch_size=batch_size
  )

num_classes = len(train_data.class_names)
print("Classes:", train_data.class_names)

# print the label mapped with the class Names
for idx, name in enumerate(train_data.class_names):
    print(f"Label {idx}: {name}")

# Splitting val_test_data into val_data and test_data
val_data = val_test_data.take(int(len(val_test_data) * 0.5))
test_data = val_test_data.skip(int(len(val_test_data) * 0.5))

# Print the cardinality
print(f"Train batches: {tf.data.experimental.cardinality(train_data)}")
print(f"Val batches:   {tf.data.experimental.cardinality(val_data)}")
print(f"Test batches:  {tf.data.experimental.cardinality(test_data)}")

##### Visualization of the diseases images ######
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from IPython.display import display, Image

# Get the list of class names (folders)
class_names = train_data.class_names

# Number of images to display per class
num_images_per_class = 6

plt.figure(figsize=(15, 10))

# Loop through each class
for i, class_name in enumerate(class_names):
    class_folder = os.path.join(images_folder, class_name)
    image_files = [f for f in os.listdir(class_folder) if os.path.isfile(os.path.join(class_folder, f))]

    # Display the first num_images_per_class images from the class
    for j in range(min(num_images_per_class, len(image_files))):
        img_path = os.path.join(class_folder, image_files[j])
        plt.subplot(len(class_names), num_images_per_class, i * num_images_per_class + j + 1)
        img = mpimg.imread(img_path)
        plt.imshow(img)
        plt.title(class_name)
        plt.axis('off')

plt.tight_layout()
plt.show()

# Trying to optimize dataset performance: Keep the dataset in memory to avoid reload and decode img from disk everytime by TensorFlow and allow the data pipeline to prepare the next batch while the current one is processing by the GPU/TPU
AUTOTUNE = tf.data.AUTOTUNE
train_data = train_data.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_data = val_data.cache().prefetch(buffer_size=AUTOTUNE)
test_data = test_data.cache().prefetch(buffer_size=AUTOTUNE)

# Normalize the images by pixel values [0, 1]
normalization_layer = layers.Rescaling(1./255)
train_data = train_data.map(lambda x, y: (normalization_layer(x), y))
val_data = val_data.map(lambda x, y: (normalization_layer(x), y))
test_data = test_data.map(lambda x, y: (normalization_layer(x), y))

# # CNN Model

model = keras.Sequential([
    layers.Conv2D(32, 3, activation='relu', input_shape=(img_height, img_width, 3)),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(128, 3, activation='relu'),
    layers.MaxPooling2D(),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(num_classes, activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
print('\n\n======================================= My own CNN model ===============================================')
model.summary()

# # Train the model
model.fit(train_data, validation_data=val_data, epochs=10)

# # Save the CNN model
model.save("model")

# # Evalutation on the test
test_loss, test_acc = model.evaluate(test_data)
print('Test accuracy:', test_acc)