ParkinsonDetection / train_model.py
Genos77's picture
first commit
e9ee222
import os
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
import noisereduce as nr
import random
import shutil
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
DATA_SOURCE_PATH = 'data'
SPECTROGRAM_PATH = 'spectrograms_stft_5s_grayscale'
MODEL_SAVE_PATH = 'parkinson_cnn_model_stft_grayscale.h5'
IMG_HEIGHT, IMG_WIDTH = 224, 224
BATCH_SIZE = 32
TARGET_DURATION_S = 5
# Data Augmentation
def augment_audio(y, sr):
y_aug = y.copy()
pitch_steps = random.uniform(-2, 2)
y_aug = librosa.effects.pitch_shift(y_aug, sr=sr, n_steps=pitch_steps)
stretch_rate = random.uniform(0.9, 1.1)
y_aug = librosa.effects.time_stretch(y_aug, rate=stretch_rate)
noise_amp = 0.005 * np.random.uniform() * np.amax(y)
y_aug = y_aug + noise_amp * np.random.normal(size=len(y_aug))
return y_aug
# Spectrogram Creation
def create_stft_spectrogram(audio_file, save_path, augment=False):
"""
Creates a high-quality GRAYSCALE spectrogram from a standardized 5s audio segment.
"""
try:
y, sr = librosa.load(audio_file, sr=None)
target_samples = TARGET_DURATION_S * sr
if len(y) > target_samples:
start_index = int((len(y) - target_samples) / 2)
y_segment = y[start_index : start_index + target_samples]
else:
y_segment = librosa.util.pad_center(y, size=target_samples)
if augment:
y_segment = augment_audio(y_segment, sr)
y_reduced = nr.reduce_noise(y=y_segment, sr=sr)
N_FFT = 1024
HOP_LENGTH = 256
S_audio = librosa.stft(y_reduced, n_fft=N_FFT, hop_length=HOP_LENGTH)
Y_db = librosa.amplitude_to_db(np.abs(S_audio), ref=np.max)
plt.figure(figsize=(12, 4))
librosa.display.specshow(Y_db, sr=sr, hop_length=HOP_LENGTH, x_axis='time', y_axis='log', cmap='gray_r')
plt.axis('off')
plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
plt.close()
return True
except Exception as e:
print(f" - Error processing {audio_file}: {e}")
return False
#Data Preparation
def process_all_audio_files():
if os.path.exists(SPECTROGRAM_PATH):
shutil.rmtree(SPECTROGRAM_PATH)
print(f"Starting audio to Grayscale STFT Spectrogram conversion ({TARGET_DURATION_S}s)...")
for split in ['train', 'validation']:
for category in ['parkinson', 'healthy']:
os.makedirs(os.path.join(SPECTROGRAM_PATH, split, category), exist_ok=True)
for category in ['parkinson', 'healthy']:
source_dir = os.path.join(DATA_SOURCE_PATH, category)
all_files = [f for f in os.listdir(source_dir) if f.lower().endswith(('.wav', '.mp3'))]
if not all_files: continue
random.shuffle(all_files)
split_index = int(len(all_files) * 0.8)
train_files, validation_files = all_files[:split_index], all_files[split_index:]
print(f"--- Processing Category: {category} ---")
for filename in train_files:
file_path = os.path.join(source_dir, filename)
base_name = os.path.splitext(filename)[0]
for i in range(3):
save_path = os.path.join(SPECTROGRAM_PATH, 'train', category, f"{base_name}_aug_{i}.png")
create_stft_spectrogram(file_path, save_path, augment=(i > 0))
for filename in validation_files:
file_path = os.path.join(source_dir, filename)
base_name = os.path.splitext(filename)[0]
save_path = os.path.join(SPECTROGRAM_PATH, 'validation', category, f"{base_name}.png")
create_stft_spectrogram(file_path, save_path, augment=False)
print("Spectrogram generation complete.")
def train_cnn_model():
"""
Trains a CNN model optimized for grayscale spectrograms.
"""
if not os.path.exists(SPECTROGRAM_PATH):
print("Spectrograms not found.")
return
train_datagen = ImageDataGenerator(rescale=1./255)
validation_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(
os.path.join(SPECTROGRAM_PATH, 'train'),
target_size=(IMG_HEIGHT, IMG_WIDTH),
batch_size=BATCH_SIZE,
class_mode='binary',
color_mode='grayscale' # Tells Keras to load images with 1 channel
)
validation_generator = validation_datagen.flow_from_directory(
os.path.join(SPECTROGRAM_PATH, 'validation'),
target_size=(IMG_HEIGHT, IMG_WIDTH),
batch_size=BATCH_SIZE,
class_mode='binary',
color_mode='grayscale'
)
if not train_generator.samples > 0:
print("Error: No training images were generated.")
return
model = Sequential([
Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 1), padding='same'),
BatchNormalization(),
MaxPooling2D((2, 2)),
Conv2D(64, (3, 3), activation='relu', padding='same'),
BatchNormalization(),
MaxPooling2D((2, 2)),
Conv2D(128, (3, 3), activation='relu', padding='same'),
BatchNormalization(),
MaxPooling2D((2, 2)),
Flatten(),
Dense(256, activation='relu'),
BatchNormalization(),
Dropout(0.5),
Dense(128, activation='relu'),
Dropout(0.4),
Dense(1, activation='sigmoid')
])
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
model.summary()
callbacks_list = [
EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True),
ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7),
ModelCheckpoint(MODEL_SAVE_PATH, monitor='val_accuracy', save_best_only=True, mode='max')
]
model.fit(
train_generator,
epochs=100,
validation_data=validation_generator,
callbacks=callbacks_list
)
print(f"Grayscale model training complete. Best model saved to {MODEL_SAVE_PATH}")
if __name__ == '__main__':
process_all_audio_files()
train_cnn_model()