######################################################################## # This file does not need to by run here. It's the code used to train the CNN model for images classification. # From this code, I got the model file what I used into the agent for classification disease. ######################################################################## # Import the warnings module, which allows control over warning messages import warnings # Suppress all warnings in the script warnings.filterwarnings("ignore") # === Standard Library Imports === import os # For interacting with the operating system (e.g., file paths) # === Data Handling and Manipulation === import pandas as pd # For handling and manipulating datasets import numpy as np # For numerical operations # === Data Visualization === import matplotlib.pyplot as plt # For creating static visualizations import seaborn as sns # For enhanced data visualizations (e.g., heatmaps) # === Machine Learning Utilities === from sklearn.model_selection import train_test_split # For splitting data into train/test sets from sklearn.metrics import classification_report, confusion_matrix # For evaluating classification performance # === Deep Learning Libraries === import tensorflow as tf # TensorFlow for deep learning from tensorflow.keras import layers, models # Core TensorFlow Keras layers and model building tools from tensorflow.keras.models import Sequential # For creating sequential models from tensorflow.keras.preprocessing.image import ImageDataGenerator # For image augmentation and loading from tensorflow.keras.preprocessing.image import load_img, img_to_array # For loading and converting images from tensorflow import keras # === CNN Model Components === from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense # Core CNN layers from tensorflow.keras.preprocessing import image_dataset_from_directory from tensorflow.keras.layers import Dropout, BatchNormalization, GlobalAveragePooling2D # Advanced layers for regularization and optimization # === Pre-trained Model Import === from tensorflow.keras.applications import VGG16 # Importing the VGG16 model for transfer learning # === Regularization === from tensorflow.keras import regularizers # For adding regularization to the model # Get the link of the images for google drive #### IMPORTANT. This google drive link need to be adapted to be used here in Hugging Face. images_folder = "/content/drive/MyDrive/Colab Notebooks/Dataset/IMG_CLASSES/" # Definie parameter to users for the Training, Validation, testing batch_size = 32 img_height = 180 img_width = 180 seed = 123 # Training data splitted train_data = image_dataset_from_directory( images_folder, validation_split=0.2, # subset="training", seed=seed, image_size=(img_height, img_width), batch_size=batch_size ) # Validation data will be splitted again into val and test val_test_data = image_dataset_from_directory( images_folder, validation_split=0.2, subset="validation", seed=seed, image_size=(img_height, img_width), batch_size=batch_size ) num_classes = len(train_data.class_names) print("Classes:", train_data.class_names) # print the label mapped with the class Names for idx, name in enumerate(train_data.class_names): print(f"Label {idx}: {name}") # Splitting val_test_data into val_data and test_data val_data = val_test_data.take(int(len(val_test_data) * 0.5)) test_data = val_test_data.skip(int(len(val_test_data) * 0.5)) # Print the cardinality print(f"Train batches: {tf.data.experimental.cardinality(train_data)}") print(f"Val batches: {tf.data.experimental.cardinality(val_data)}") print(f"Test batches: {tf.data.experimental.cardinality(test_data)}") ##### Visualization of the diseases images ###### import os import matplotlib.pyplot as plt import matplotlib.image as mpimg from IPython.display import display, Image # Get the list of class names (folders) class_names = train_data.class_names # Number of images to display per class num_images_per_class = 6 plt.figure(figsize=(15, 10)) # Loop through each class for i, class_name in enumerate(class_names): class_folder = os.path.join(images_folder, class_name) image_files = [f for f in os.listdir(class_folder) if os.path.isfile(os.path.join(class_folder, f))] # Display the first num_images_per_class images from the class for j in range(min(num_images_per_class, len(image_files))): img_path = os.path.join(class_folder, image_files[j]) plt.subplot(len(class_names), num_images_per_class, i * num_images_per_class + j + 1) img = mpimg.imread(img_path) plt.imshow(img) plt.title(class_name) plt.axis('off') plt.tight_layout() plt.show() # Trying to optimize dataset performance: Keep the dataset in memory to avoid reload and decode img from disk everytime by TensorFlow and allow the data pipeline to prepare the next batch while the current one is processing by the GPU/TPU AUTOTUNE = tf.data.AUTOTUNE train_data = train_data.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE) val_data = val_data.cache().prefetch(buffer_size=AUTOTUNE) test_data = test_data.cache().prefetch(buffer_size=AUTOTUNE) # Normalize the images by pixel values [0, 1] normalization_layer = layers.Rescaling(1./255) train_data = train_data.map(lambda x, y: (normalization_layer(x), y)) val_data = val_data.map(lambda x, y: (normalization_layer(x), y)) test_data = test_data.map(lambda x, y: (normalization_layer(x), y)) # # CNN Model model = keras.Sequential([ layers.Conv2D(32, 3, activation='relu', input_shape=(img_height, img_width, 3)), layers.MaxPooling2D(), layers.Conv2D(64, 3, activation='relu'), layers.MaxPooling2D(), layers.Conv2D(128, 3, activation='relu'), layers.MaxPooling2D(), layers.Flatten(), layers.Dense(128, activation='relu'), layers.Dropout(0.5), layers.Dense(num_classes, activation='softmax') ]) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) print('\n\n======================================= My own CNN model ===============================================') model.summary() # # Train the model model.fit(train_data, validation_data=val_data, epochs=10) # # Save the CNN model model.save("model") # # Evalutation on the test test_loss, test_acc = model.evaluate(test_data) print('Test accuracy:', test_acc)