annaferrari02's picture
upload files
0fcd8ae verified
"""
Utility functions for surgical instrument classification
"""
import cv2
import numpy as np
from skimage.feature.texture import graycomatrix, graycoprops
from skimage.feature import local_binary_pattern, hog
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
import pywt
def preprocess_image(image):
"""
Apply CLAHE preprocessing for better contrast
(Contrast Limited Adaptive Historam Equalization)
"""
# Convert to LAB color space (basically separating lightness, L, from color info)
lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
l, a, b = cv2.split(lab) #this enhances constrast between colors
# Apply CLAHE to L channel
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)) #split into a 8x8 grid and performs the contrast enhancement to the smaller regions instead of full image
l = clahe.apply(l)
# Merge and convert back
enhanced = cv2.merge([l, a, b]) #merge the contrast channel with the other two (A,B)
enhanced = cv2.cvtColor(enhanced, cv2.COLOR_LAB2BGR) #go back to BGR so it can be used later on
return enhanced
#this is the same as baseline code, well working so let's keep it
#it basically computes normalized color histograms for the classic three channels
def rgb_histogram(image, bins=256):
"""Extract RGB histogram features"""
hist_features = []
for i in range(3): # RGB Channels
hist, _ = np.histogram(image[:, :, i], bins=bins, range=(0, 256), density=True)
hist_features.append(hist)
return np.concatenate(hist_features)
def hu_moments(image):
"""Extract Hu moment features, takes BGR format in input
basically provides shape description that are consistent
wrt to position, size and rotation"""
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) #turn to greyscale (works in 1 channel)
moments = cv2.moments(gray)
hu_moments = cv2.HuMoments(moments).flatten()
return hu_moments
def glcm_features(image, distances=[1], angles=[0], levels=256, symmetric=True, normed=True):
"""Extract GLCM texture features,
captures texture info considering spatial
relationship between pixel intensities. works well with RGB and hu"""
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
glcm = graycomatrix(gray, distances=distances, angles=angles, levels=levels,
symmetric=symmetric, normed=normed)
contrast = graycoprops(glcm, 'contrast').flatten()
dissimilarity = graycoprops(glcm, 'dissimilarity').flatten()
homogeneity = graycoprops(glcm, 'homogeneity').flatten()
energy = graycoprops(glcm, 'energy').flatten()
correlation = graycoprops(glcm, 'correlation').flatten()
asm = graycoprops(glcm, 'ASM').flatten()
return np.concatenate([contrast, dissimilarity, homogeneity, energy, correlation, asm])
def local_binary_pattern_features(image, P=8, R=1):
"""Extract Local Binary Pattern features, useful for light changes
combined with rgb, hu and glcm"""
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
lbp = local_binary_pattern(gray, P, R, method='uniform')
(hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, P + 3),
range=(0, P + 2), density=True)
return hist #feature vector representing the texture of the image
def hog_features(image, orientations=12, pixels_per_cell=(16, 16), cells_per_block=(2, 2)):
"""
Extract HOG (Histogram of Oriented Gradients) features
Great for capturing shape and edge information in surgical instruments
"""
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Resize to standard size for consistency
gray_resized = cv2.resize(gray, (256, 256)) #we could try using 256 here and 16,16 cells per block
hog_features_vector = hog(
gray_resized,
orientations=orientations,
pixels_per_cell=pixels_per_cell,
cells_per_block=cells_per_block,
block_norm='L2-Hys',
feature_vector=True
)
return hog_features_vector #Returns a vector capturing local edge
#directions and shape information, useful for detecting instruments,
#objects, or structural patterns.
def luv_histogram(image, bins=32): #instead of bgr it uses lightness and chromatic components
"""
Extract histogram in LUV color space
LUV is perceptually uniform and better for underwater/surgical imaging
"""
luv = cv2.cvtColor(image, cv2.COLOR_BGR2LUV)
hist_features = []
for i in range(3):
hist, _ = np.histogram(luv[:, :, i], bins=bins, range=(0, 256), density=True)
hist_features.append(hist)
return np.concatenate(hist_features)
def gabor_features(image, frequencies=[0.1, 0.2, 0.3],
orientations=[0, 45, 90, 135]):
"""
Extract Gabor filter features (gabor kernels)
texture orientation that deals well with different scales and diff orientation
"""
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # uses intensity and not color
features = []
for freq in frequencies:
for theta in orientations:
theta_rad = theta * np.pi / 180
kernel = cv2.getGaborKernel((21, 21), 5, theta_rad,
10.0/freq, 0.5, 0)
filtered = cv2.filter2D(gray, cv2.CV_32F, kernel)
features.append(np.mean(filtered))
features.append(np.std(filtered))
return np.array(features)
def wavelet_features(image, wavelet='db4', levels=3):
# try out multi-orientation wavelets (e.g., sym8, coif)
"""
multi-scale wavelet feature extractor
focus on texture + edges.
This function converts an image to grayscale,
performs a multi-level 2D wavelet decomposition,
and extracts texture features from the approximation and detail sub-bands.
It summarizes each band using statistics like mean, standard deviation, and peak magnitude.
The result is a numerical feature vector capturing multi-scale texture and edge
information for tasks like surgical tool detection.
"""
# Convert to grayscale float32 in [0,1]
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY).astype(np.float32) / 255.0
# Ensure the decomposition level is valid for this image
max_level = pywt.dwt_max_level(min(gray.shape), pywt.Wavelet(wavelet).dec_len)
levels = min(levels, max_level)
coeffs = pywt.wavedec2(gray, wavelet=wavelet, level=levels)
features = []
#Approximation Coefficients (LL)
LL = coeffs[0]
LL_abs = np.abs(LL)
features.extend([
LL.mean(),
LL.std(),
LL_abs.max(),
LL_abs.mean(),
])
# Detail Coefficients for each level: (LH, HL, HH)
for (LH, HL, HH) in coeffs[1:]:
for band in (LH, HL, HH):
band_abs = np.abs(band)
features.extend([
band_abs.mean(), # energy-like texture measure
band_abs.std(), # variation in texture
band_abs.max(), # strongest directional edge
np.percentile(band_abs, 95), # robust peak measure
])
return np.array(features, dtype=np.float32)
def extract_features_from_image(image):
"""
Extract enhanced features from image
Uses baseline features + HOG + LUV histogram + Gabor for better performance
Args:
image: Input image (BGR format from cv2.imread)
Returns:
Feature vector as numpy array
"""
# Preprocess image first
image = preprocess_image(image)
# Baseline features
hist_features = rgb_histogram(image)
hu_features = hu_moments(image)
glcm_features_vector = glcm_features(image)
lbp_features = local_binary_pattern_features(image)
# Enhanced features that add discriminative power for complex images
hog_feat = hog_features(image)
luv_hist = luv_histogram(image)
gabor_feat = gabor_features(image)
wavelet_feat = wavelet_features(image)
# Concatenate all features (produces a single vector)
image_features = np.concatenate([
hist_features,
hu_features,
glcm_features_vector,
lbp_features,
hog_feat,
luv_hist,
gabor_feat,
wavelet_feat
])
return image_features # comprehensive numerical representation of the imag
def fit_pca_transformer(data, num_components):
"""
Fit a PCA transformer on training data
Args:
data: Training data (n_samples, n_features)
num_components: Number of PCA components to keep
Returns:
pca_params: Dictionary containing PCA parameters
data_reduced: PCA-transformed data
"""
# Standardize the data
mean = np.mean(data, axis=0)
std = np.std(data, axis=0)
# Avoid division by zero
std[std == 0] = 1.0
data_standardized = (data - mean) / std
# Fit PCA using sklearn
pca_model = PCA(n_components=num_components)
data_reduced = pca_model.fit_transform(data_standardized)
# Create params dictionary
pca_params = {
'pca_model': pca_model,
'mean': mean,
'std': std,
'num_components': num_components,
'feature_dim': data.shape[1],
'explained_variance_ratio': pca_model.explained_variance_ratio_,
'cumulative_variance': np.cumsum(pca_model.explained_variance_ratio_)
}
return pca_params, data_reduced
def apply_pca_transform(data, pca_params):
"""
Apply saved PCA transformation to new data
CRITICAL: This uses the saved mean/std/PCA from training
Args:
data: New data to transform (n_samples, n_features)
pca_params: Dictionary from fit_pca_transformer
Returns:
Transformed data
"""
# Standardize using training mean/std
data_standardized = (data - pca_params['mean']) / pca_params['std']
# Apply PCA transformation
# Projects new data onto the same principal components computed from training data
data_reduced = pca_params['pca_model'].transform(data_standardized)
return data_reduced
def train_svm_model(features, labels, kernel='rbf', C=1.0):
"""
Train an SVM model on ALL available data (no train/test split)
Args:
features: Feature matrix (n_samples, n_features)
labels: Label array (n_samples,)
kernel: SVM kernel type ('linear', 'rbf', 'poly', 'sigmoid')
C: Regularization parameter (smaller = more regularization)
Returns:
Dictionary containing model and metrics
"""
# Check if labels are one-hot encoded
if labels.ndim > 1 and labels.shape[1] > 1:
labels = np.argmax(labels, axis=1)
# Train SVM on ALL data
svm_model = SVC(kernel=kernel, C=C, random_state=56)
svm_model.fit(features, labels)
results = {
'model': svm_model
}
return results