Spaces:

Ethium
/

Glaucoma_Detector

Runtime error

App Files Files Community

Glaucoma_Detector / glaucoma_detection_model.py

Ethium

Upload glaucoma_detection_model.py

75192d4 verified over 1 year ago

raw

history blame contribute delete

7.85 kB

	#!/usr/bin/env python
	# coding: utf-8

	# In[1]:


	from fastai.data.all import *
	from fastai.vision.all import *
	import cv2
	import os
	from pathlib import Path
	import pandas as pd

	# Load your CSV file into a pandas DataFrame
	path_csv_combined = Path('D:\\Documents\\Machine Learning - Glaucoma\\combined_csv.csv')

	# Define the image path with images from all three databases combined
	path_image_combined = Path('D:\\Documents\\Machine Learning - Glaucoma\\combined images')

	# Load dataframe
	combined_df = pd.read_csv(path_csv_combined)


	# In[2]:


	from sklearn.model_selection import train_test_split

	train_df, test_df = train_test_split(combined_df, test_size=0.15, random_state=42, stratify=combined_df['label'])
	train_df, val_df = train_test_split(train_df, test_size=0.15, random_state=42, stratify=train_df['label'])


	# Display the sizes of the datasets
	print(f"Training set size: {len(train_df)} samples")
	print(f"Validation set size: {len(val_df)} samples")
	print(f"Test set size: {len(test_df)} samples")


	# In[3]:


	print(combined_df['label'].value_counts())


	# In[4]:


	import matplotlib.pyplot as plt

	combined_df['label'].value_counts().plot(kind='bar')
	plt.title('Class distribution')
	plt.xlabel('Class')
	plt.ylabel('Count')
	plt.show()


	# In[5]:


	print(train_df['label'].value_counts())


	# In[6]:


	import matplotlib.pyplot as plt

	train_df['label'].value_counts().plot(kind='bar')
	plt.title('Class distribution')
	plt.xlabel('Class')
	plt.ylabel('Count')
	plt.show()


	# In[7]:


	import cv2
	import numpy as np
	from skimage import filters
	from PIL import Image

	# Define how to get the labels
	def get_y(row):
	return row['label'] # adjust this depending on how your csv is structured

	# Define the transformations
	def custom_transform(image_path):
	image = cv2.imread(str(image_path)) # Read the image file.
	if image is None:
	return None

	# Convert the image from BGR to RGB
	image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

	# Apply filters and transformations
	# Gaussian filter
	image = cv2.GaussianBlur(image, (5, 5), 0)

	# Histogram Equalization
	img_yuv = cv2.cvtColor(image, cv2.COLOR_RGB2YUV)
	img_yuv[:,:,0] = cv2.equalizeHist(img_yuv[:,:,0])
	image = cv2.cvtColor(img_yuv, cv2.COLOR_YUV2RGB)

	# Median filter
	image = cv2.medianBlur(image, 3)

	# Bypass filter (leaving the image unchanged)
	# (add any specific implementation if needed)

	# Sharpening filter
	kernel = np.array([[0, -1, 0],
	[-1, 5,-1],
	[0, -1, 0]])
	image = cv2.filter2D(image, -1, kernel)

	# Resize the image to a target size of 224x224 pixels.
	image = cv2.resize(image, (224, 224))
	return image

	from albumentations import (
	Compose, Rotate, RandomBrightnessContrast, OpticalDistortion, IAAPerspective
	)
	import albumentations.pytorch as A

	def additional_augmentations(image):
	transform = Compose([
	Rotate(limit=10, p=0.75), # max_rotate=10.0
	RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.75), # max_lighting=0.2, p_lighting=0.75
	OpticalDistortion(distort_limit=0.2, shift_limit=0.2, p=0.75), # max_warp=0.2, p_affine=0.75
	# No flipping is performed as do_flip and flip_vert are both set to False
	], p=1) # p=1 ensures that the augmentations are always applied

	augmented_image = transform(image=image)['image']
	return augmented_image

	def get_x(row, is_test=False):
	image_path = path_image_combined / (row['id_code'])
	transformed_image = custom_transform(image_path)

	# Check the label of the current image and apply augmentations if it belongs to the minority class
	if not is_test and row['label'] == 1:
	transformed_image = additional_augmentations(transformed_image)

	return Image.fromarray(transformed_image)

	# Define a DataBlock
	dblock = DataBlock(
	blocks=(ImageBlock(cls=PILImage), CategoryBlock),
	get_x=get_x,
	get_y=get_y,
	item_tfms=None,
	batch_tfms=None)

	# Create a DataLoader for training data
	dls = dblock.dataloaders(train_df,bs = 128)


	# In[8]:


	#Print the first few rows of the 'df_train' DataFrame
	print(train_df.head())


	# In[9]:


	# Extract all the rows from the training dataset where the 'label' column has a value of 0,
	# which represents the majority class in this context.
	majority_class = train_df[train_df['label'] == 0]

	# Extract all the rows from the training dataset where the 'label' column has a value of 1,
	# which represents the minority class in this context.
	minority_class = train_df[train_df['label'] == 1]


	# In[10]:


	# Oversample the minority class to have the same number of samples as the majority class
	oversampled_minority_class = minority_class.sample(n=len(majority_class), replace=True, random_state=42)

	# Concatenate the oversampled minority class DataFrame with the majority class DataFrame to create a balanced dataset
	oversampled_train_df = pd.concat([majority_class, oversampled_minority_class], axis=0)

	# Shuffle the oversampled DataFrame to ensure a random distribution of classes
	oversampled_train_df = oversampled_train_df.sample(frac=1, random_state=42).reset_index(drop=True)

	# Create a DataLoader using the balanced DataFrame and a batch size of 128
	dls = dblock.dataloaders(oversampled_train_df, bs=128)


	# In[11]:


	#Display a batch of data from the training dataloader
	dls.show_batch()


	# In[12]:


	from fastai.metrics import AccumMetric
	from sklearn.metrics import roc_auc_score

	def custom_roc_auc_score(preds, targs):
	# preds are assumed to be from a binary classification model with n_out=2
	# taking the probability of the positive class (usually the second column)
	probs = preds[:, 1]
	return roc_auc_score(targs, probs)

	# Now use this custom metric in your learner
	learn = cnn_learner(dls, resnet50,
	n_out=2, # For binary classification
	loss_func=CrossEntropyLossFlat(),
	metrics=[
	accuracy,
	Precision(average='binary'),
	Recall(average='binary'),
	F1Score(average='binary'),
	AccumMetric(custom_roc_auc_score, flatten=False) # Custom ROC AUC
	],
	cbs=[
	EarlyStoppingCallback(monitor='valid_loss', patience=3),
	SaveModelCallback(monitor='valid_loss', fname='best_model')
	]
	)


	# In[13]:


	# Train the model
	# Monitor the loss during training; it should typically decrease over epochs
	learn.fit_one_cycle(10, 5e-02)


	# In[14]:


	interp = ClassificationInterpretation.from_learner(learn)
	interp.plot_confusion_matrix(figsize=(8,8))


	# In[15]:


	from sklearn.metrics import accuracy_score, precision_score, classification_report

	# Assuming dls is your DataLoaders object
	test_dl = dls.test_dl(test_df, with_labels=True)

	# Modify the get_x function in the DataLoader to indicate it's for testing
	test_dl.dataset.get_x = partial(get_x, is_test=True)

	# Get predictions and targets
	preds, targs = learn.get_preds(dl=test_dl)

	# Get the prediction indices
	preds_argmax = preds.argmax(dim=-1)

	# Calculate and print accuracy, precision, and other metrics
	accuracy = accuracy_score(targs.numpy(), preds_argmax.numpy())
	print(f'Accuracy: {accuracy * 100:.2f}%')

	precision = precision_score(targs.numpy(), preds_argmax.numpy())
	print(f'Precision: {precision * 100:.2f}%')

	report = classification_report(targs.numpy(), preds_argmax.numpy())
	print(report)


	# In[16]:


	import os
	from fastai.vision.all import *

	# Export the model to the directory
	model_export_path = 'D:/Documents/Machine Learning - Glaucoma/your_model.pkl'
	learn.export(model_export_path)