Glaucoma_Detector / glaucoma_detection_model.py
Ethium's picture
Upload glaucoma_detection_model.py
75192d4 verified
raw
history blame contribute delete
No virus
7.85 kB
#!/usr/bin/env python
# coding: utf-8
# In[1]:
from fastai.data.all import *
from fastai.vision.all import *
import cv2
import os
from pathlib import Path
import pandas as pd
# Load your CSV file into a pandas DataFrame
path_csv_combined = Path('D:\\Documents\\Machine Learning - Glaucoma\\combined_csv.csv')
# Define the image path with images from all three databases combined
path_image_combined = Path('D:\\Documents\\Machine Learning - Glaucoma\\combined images')
# Load dataframe
combined_df = pd.read_csv(path_csv_combined)
# In[2]:
from sklearn.model_selection import train_test_split
train_df, test_df = train_test_split(combined_df, test_size=0.15, random_state=42, stratify=combined_df['label'])
train_df, val_df = train_test_split(train_df, test_size=0.15, random_state=42, stratify=train_df['label'])
# Display the sizes of the datasets
print(f"Training set size: {len(train_df)} samples")
print(f"Validation set size: {len(val_df)} samples")
print(f"Test set size: {len(test_df)} samples")
# In[3]:
print(combined_df['label'].value_counts())
# In[4]:
import matplotlib.pyplot as plt
combined_df['label'].value_counts().plot(kind='bar')
plt.title('Class distribution')
plt.xlabel('Class')
plt.ylabel('Count')
plt.show()
# In[5]:
print(train_df['label'].value_counts())
# In[6]:
import matplotlib.pyplot as plt
train_df['label'].value_counts().plot(kind='bar')
plt.title('Class distribution')
plt.xlabel('Class')
plt.ylabel('Count')
plt.show()
# In[7]:
import cv2
import numpy as np
from skimage import filters
from PIL import Image
# Define how to get the labels
def get_y(row):
return row['label'] # adjust this depending on how your csv is structured
# Define the transformations
def custom_transform(image_path):
image = cv2.imread(str(image_path)) # Read the image file.
if image is None:
return None
# Convert the image from BGR to RGB
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Apply filters and transformations
# Gaussian filter
image = cv2.GaussianBlur(image, (5, 5), 0)
# Histogram Equalization
img_yuv = cv2.cvtColor(image, cv2.COLOR_RGB2YUV)
img_yuv[:,:,0] = cv2.equalizeHist(img_yuv[:,:,0])
image = cv2.cvtColor(img_yuv, cv2.COLOR_YUV2RGB)
# Median filter
image = cv2.medianBlur(image, 3)
# Bypass filter (leaving the image unchanged)
# (add any specific implementation if needed)
# Sharpening filter
kernel = np.array([[0, -1, 0],
[-1, 5,-1],
[0, -1, 0]])
image = cv2.filter2D(image, -1, kernel)
# Resize the image to a target size of 224x224 pixels.
image = cv2.resize(image, (224, 224))
return image
from albumentations import (
Compose, Rotate, RandomBrightnessContrast, OpticalDistortion, IAAPerspective
)
import albumentations.pytorch as A
def additional_augmentations(image):
transform = Compose([
Rotate(limit=10, p=0.75), # max_rotate=10.0
RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.75), # max_lighting=0.2, p_lighting=0.75
OpticalDistortion(distort_limit=0.2, shift_limit=0.2, p=0.75), # max_warp=0.2, p_affine=0.75
# No flipping is performed as do_flip and flip_vert are both set to False
], p=1) # p=1 ensures that the augmentations are always applied
augmented_image = transform(image=image)['image']
return augmented_image
def get_x(row, is_test=False):
image_path = path_image_combined / (row['id_code'])
transformed_image = custom_transform(image_path)
# Check the label of the current image and apply augmentations if it belongs to the minority class
if not is_test and row['label'] == 1:
transformed_image = additional_augmentations(transformed_image)
return Image.fromarray(transformed_image)
# Define a DataBlock
dblock = DataBlock(
blocks=(ImageBlock(cls=PILImage), CategoryBlock),
get_x=get_x,
get_y=get_y,
item_tfms=None,
batch_tfms=None)
# Create a DataLoader for training data
dls = dblock.dataloaders(train_df,bs = 128)
# In[8]:
#Print the first few rows of the 'df_train' DataFrame
print(train_df.head())
# In[9]:
# Extract all the rows from the training dataset where the 'label' column has a value of 0,
# which represents the majority class in this context.
majority_class = train_df[train_df['label'] == 0]
# Extract all the rows from the training dataset where the 'label' column has a value of 1,
# which represents the minority class in this context.
minority_class = train_df[train_df['label'] == 1]
# In[10]:
# Oversample the minority class to have the same number of samples as the majority class
oversampled_minority_class = minority_class.sample(n=len(majority_class), replace=True, random_state=42)
# Concatenate the oversampled minority class DataFrame with the majority class DataFrame to create a balanced dataset
oversampled_train_df = pd.concat([majority_class, oversampled_minority_class], axis=0)
# Shuffle the oversampled DataFrame to ensure a random distribution of classes
oversampled_train_df = oversampled_train_df.sample(frac=1, random_state=42).reset_index(drop=True)
# Create a DataLoader using the balanced DataFrame and a batch size of 128
dls = dblock.dataloaders(oversampled_train_df, bs=128)
# In[11]:
#Display a batch of data from the training dataloader
dls.show_batch()
# In[12]:
from fastai.metrics import AccumMetric
from sklearn.metrics import roc_auc_score
def custom_roc_auc_score(preds, targs):
# preds are assumed to be from a binary classification model with n_out=2
# taking the probability of the positive class (usually the second column)
probs = preds[:, 1]
return roc_auc_score(targs, probs)
# Now use this custom metric in your learner
learn = cnn_learner(dls, resnet50,
n_out=2, # For binary classification
loss_func=CrossEntropyLossFlat(),
metrics=[
accuracy,
Precision(average='binary'),
Recall(average='binary'),
F1Score(average='binary'),
AccumMetric(custom_roc_auc_score, flatten=False) # Custom ROC AUC
],
cbs=[
EarlyStoppingCallback(monitor='valid_loss', patience=3),
SaveModelCallback(monitor='valid_loss', fname='best_model')
]
)
# In[13]:
# Train the model
# Monitor the loss during training; it should typically decrease over epochs
learn.fit_one_cycle(10, 5e-02)
# In[14]:
interp = ClassificationInterpretation.from_learner(learn)
interp.plot_confusion_matrix(figsize=(8,8))
# In[15]:
from sklearn.metrics import accuracy_score, precision_score, classification_report
# Assuming dls is your DataLoaders object
test_dl = dls.test_dl(test_df, with_labels=True)
# Modify the get_x function in the DataLoader to indicate it's for testing
test_dl.dataset.get_x = partial(get_x, is_test=True)
# Get predictions and targets
preds, targs = learn.get_preds(dl=test_dl)
# Get the prediction indices
preds_argmax = preds.argmax(dim=-1)
# Calculate and print accuracy, precision, and other metrics
accuracy = accuracy_score(targs.numpy(), preds_argmax.numpy())
print(f'Accuracy: {accuracy * 100:.2f}%')
precision = precision_score(targs.numpy(), preds_argmax.numpy())
print(f'Precision: {precision * 100:.2f}%')
report = classification_report(targs.numpy(), preds_argmax.numpy())
print(report)
# In[16]:
import os
from fastai.vision.all import *
# Export the model to the directory
model_export_path = 'D:/Documents/Machine Learning - Glaucoma/your_model.pkl'
learn.export(model_export_path)