#!/usr/bin/env python
# coding: utf-8

# In[1]:


from fastai.data.all import *
from fastai.vision.all import *
import cv2
import os
from pathlib import Path
import pandas as pd

# Load your CSV file into a pandas DataFrame
path_csv_combined = Path('D:\\Documents\\Machine Learning - Glaucoma\\combined_csv.csv')

# Define the image path with images from all three databases combined
path_image_combined = Path('D:\\Documents\\Machine Learning - Glaucoma\\combined images')

# Load dataframe
combined_df = pd.read_csv(path_csv_combined)


# In[2]:


from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(combined_df, test_size=0.15, random_state=42, stratify=combined_df['label'])
train_df, val_df = train_test_split(train_df, test_size=0.15, random_state=42, stratify=train_df['label'])


# Display the sizes of the datasets
print(f"Training set size: {len(train_df)} samples")
print(f"Validation set size: {len(val_df)} samples")
print(f"Test set size: {len(test_df)} samples")


# In[3]:


print(combined_df['label'].value_counts())


# In[4]:


import matplotlib.pyplot as plt

combined_df['label'].value_counts().plot(kind='bar')
plt.title('Class distribution')
plt.xlabel('Class')
plt.ylabel('Count')
plt.show()


# In[5]:


print(train_df['label'].value_counts())


# In[6]:


import matplotlib.pyplot as plt

train_df['label'].value_counts().plot(kind='bar')
plt.title('Class distribution')
plt.xlabel('Class')
plt.ylabel('Count')
plt.show()


# In[7]:


import cv2
import numpy as np
from skimage import filters
from PIL import Image

# Define how to get the labels
def get_y(row):
    return row['label']  # adjust this depending on how your csv is structured

# Define the transformations
def custom_transform(image_path):
    image = cv2.imread(str(image_path)) # Read the image file.
    if image is None:
        return None
    
    # Convert the image from BGR to RGB
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Apply filters and transformations
    # Gaussian filter
    image = cv2.GaussianBlur(image, (5, 5), 0)
    
    # Histogram Equalization
    img_yuv = cv2.cvtColor(image, cv2.COLOR_RGB2YUV)
    img_yuv[:,:,0] = cv2.equalizeHist(img_yuv[:,:,0])
    image = cv2.cvtColor(img_yuv, cv2.COLOR_YUV2RGB)
    
    # Median filter
    image = cv2.medianBlur(image, 3)
    
    # Bypass filter (leaving the image unchanged)
    # (add any specific implementation if needed)
    
    # Sharpening filter
    kernel = np.array([[0, -1, 0], 
                       [-1, 5,-1], 
                       [0, -1, 0]])
    image = cv2.filter2D(image, -1, kernel)
    
    # Resize the image to a target size of 224x224 pixels.
    image = cv2.resize(image, (224, 224))
    return image

from albumentations import (
    Compose, Rotate, RandomBrightnessContrast, OpticalDistortion, IAAPerspective
)
import albumentations.pytorch as A

def additional_augmentations(image):
    transform = Compose([
        Rotate(limit=10, p=0.75), # max_rotate=10.0
        RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.75), # max_lighting=0.2, p_lighting=0.75
        OpticalDistortion(distort_limit=0.2, shift_limit=0.2, p=0.75), # max_warp=0.2, p_affine=0.75
        # No flipping is performed as do_flip and flip_vert are both set to False
    ], p=1) # p=1 ensures that the augmentations are always applied

    augmented_image = transform(image=image)['image']
    return augmented_image

def get_x(row, is_test=False):
    image_path = path_image_combined / (row['id_code'])
    transformed_image = custom_transform(image_path)

    # Check the label of the current image and apply augmentations if it belongs to the minority class
    if not is_test and row['label'] == 1:
        transformed_image = additional_augmentations(transformed_image)        
    
    return Image.fromarray(transformed_image)

# Define a DataBlock
dblock = DataBlock(
    blocks=(ImageBlock(cls=PILImage), CategoryBlock),
    get_x=get_x,
    get_y=get_y,
    item_tfms=None,
    batch_tfms=None)
    
# Create a DataLoader for training data
dls = dblock.dataloaders(train_df,bs = 128)


# In[8]:


#Print the first few rows of the 'df_train' DataFrame
print(train_df.head())


# In[9]:


# Extract all the rows from the training dataset where the 'label' column has a value of 0, 
# which represents the majority class in this context.
majority_class = train_df[train_df['label'] == 0]

# Extract all the rows from the training dataset where the 'label' column has a value of 1, 
# which represents the minority class in this context.
minority_class = train_df[train_df['label'] == 1]


# In[10]:


# Oversample the minority class to have the same number of samples as the majority class
oversampled_minority_class = minority_class.sample(n=len(majority_class), replace=True, random_state=42)

# Concatenate the oversampled minority class DataFrame with the majority class DataFrame to create a balanced dataset
oversampled_train_df = pd.concat([majority_class, oversampled_minority_class], axis=0)

# Shuffle the oversampled DataFrame to ensure a random distribution of classes
oversampled_train_df = oversampled_train_df.sample(frac=1, random_state=42).reset_index(drop=True)

# Create a DataLoader using the balanced DataFrame and a batch size of 128
dls = dblock.dataloaders(oversampled_train_df, bs=128)


# In[11]:


#Display a batch of data from the training dataloader
dls.show_batch()


# In[12]:


from fastai.metrics import AccumMetric
from sklearn.metrics import roc_auc_score

def custom_roc_auc_score(preds, targs):
    # preds are assumed to be from a binary classification model with n_out=2
    # taking the probability of the positive class (usually the second column)
    probs = preds[:, 1]
    return roc_auc_score(targs, probs)

# Now use this custom metric in your learner
learn = cnn_learner(dls, resnet50, 
                    n_out=2,  # For binary classification
                    loss_func=CrossEntropyLossFlat(), 
                    metrics=[
                        accuracy, 
                        Precision(average='binary'),
                        Recall(average='binary'),
                        F1Score(average='binary'),
                        AccumMetric(custom_roc_auc_score, flatten=False)  # Custom ROC AUC
                    ],
                    cbs=[
                        EarlyStoppingCallback(monitor='valid_loss', patience=3),
                        SaveModelCallback(monitor='valid_loss', fname='best_model')
                    ]
                   )


# In[13]:


# Train the model
# Monitor the loss during training; it should typically decrease over epochs
learn.fit_one_cycle(10, 5e-02)


# In[14]:


interp = ClassificationInterpretation.from_learner(learn)
interp.plot_confusion_matrix(figsize=(8,8))


# In[15]:


from sklearn.metrics import accuracy_score, precision_score, classification_report

# Assuming dls is your DataLoaders object
test_dl = dls.test_dl(test_df, with_labels=True)

# Modify the get_x function in the DataLoader to indicate it's for testing
test_dl.dataset.get_x = partial(get_x, is_test=True)

# Get predictions and targets
preds, targs = learn.get_preds(dl=test_dl)

# Get the prediction indices
preds_argmax = preds.argmax(dim=-1)

# Calculate and print accuracy, precision, and other metrics
accuracy = accuracy_score(targs.numpy(), preds_argmax.numpy())
print(f'Accuracy: {accuracy * 100:.2f}%')

precision = precision_score(targs.numpy(), preds_argmax.numpy())
print(f'Precision: {precision * 100:.2f}%')

report = classification_report(targs.numpy(), preds_argmax.numpy())
print(report)


# In[16]:


import os
from fastai.vision.all import *

# Export the model to the directory
model_export_path = 'D:/Documents/Machine Learning - Glaucoma/your_model.pkl'
learn.export(model_export_path)