Murmur-API / helper_code.py
Stroke-ia's picture
Upload helper_code.py
1d67cae verified
#!/usr/bin/env python
# Do *not* edit this script.
# These are helper functions that you can use with your code.
import os, numpy as np, scipy as sp, scipy.io, scipy.io.wavfile
# Check if a variable is a number or represents a number.
def is_number(x):
try:
float(x)
return True
except (ValueError, TypeError):
return False
# Check if a variable is an integer or represents an integer.
def is_integer(x):
if is_number(x):
return float(x).is_integer()
else:
return False
# Check if a variable is a a finite number or represents a finite number.
def is_finite_number(x):
if is_number(x):
return np.isfinite(float(x))
else:
return False
# Compare normalized strings.
def compare_strings(x, y):
try:
return str(x).strip().casefold()==str(y).strip().casefold()
except AttributeError: # For Python 2.x compatibility
return str(x).strip().lower()==str(y).strip().lower()
# Find patient data files.
def find_patient_files(data_folder):
# Find patient files.
filenames = list()
for f in sorted(os.listdir(data_folder)):
root, extension = os.path.splitext(f)
if not root.startswith('.') and extension=='.txt':
filename = os.path.join(data_folder, f)
filenames.append(filename)
# To help with debugging, sort numerically if the filenames are integers.
roots = [os.path.split(filename)[1][:-4] for filename in filenames]
if all(is_integer(root) for root in roots):
filenames = sorted(filenames, key=lambda filename: int(os.path.split(filename)[1][:-4]))
return filenames
# Load patient data as a string.
def load_patient_data(filename):
with open(filename, 'r') as f:
data = f.read()
return data
# Load a WAV file.
def load_wav_file(filename):
frequency, recording = sp.io.wavfile.read(filename)
return recording, frequency
# Load recordings.
def load_recordings(data_folder, data, get_frequencies=False):
num_locations = get_num_locations(data)
recording_information = data.split('\n')[1:num_locations+1]
recordings = list()
frequencies = list()
for i in range(num_locations):
entries = recording_information[i].split(' ')
recording_file = entries[2]
filename = os.path.join(data_folder, recording_file)
recording, frequency = load_wav_file(filename)
recordings.append(recording)
frequencies.append(frequency)
if get_frequencies:
return recordings, frequencies
else:
return recordings
# Get patient ID from patient data.
def get_patient_id(data):
patient_id = None
for i, l in enumerate(data.split('\n')):
if i==0:
try:
patient_id = l.split(' ')[0]
except:
pass
else:
break
return patient_id
# Get number of recording locations from patient data.
def get_num_locations(data):
num_locations = None
for i, l in enumerate(data.split('\n')):
if i==0:
try:
num_locations = int(l.split(' ')[1])
except:
pass
else:
break
return num_locations
# Get frequency from patient data.
def get_frequency(data):
frequency = None
for i, l in enumerate(data.split('\n')):
if i==0:
try:
frequency = float(l.split(' ')[2])
except:
pass
else:
break
return frequency
# Get recording locations from patient data.
def get_locations(data):
num_locations = get_num_locations(data)
locations = list()
for i, l in enumerate(data.split('\n')):
entries = l.split(' ')
if i==0:
pass
elif 1<=i<=num_locations:
locations.append(entries[0])
else:
break
return locations
# Get age from patient data.
def get_age(data):
age = None
for l in data.split('\n'):
if l.startswith('#Age:'):
try:
age = l.split(': ')[1].strip()
except:
pass
return age
# Get sex from patient data.
def get_sex(data):
sex = None
for l in data.split('\n'):
if l.startswith('#Sex:'):
try:
sex = l.split(': ')[1].strip()
except:
pass
return sex
# Get height from patient data.
def get_height(data):
height = None
for l in data.split('\n'):
if l.startswith('#Height:'):
try:
height = float(l.split(': ')[1].strip())
except:
pass
return height
# Get weight from patient data.
def get_weight(data):
weight = None
for l in data.split('\n'):
if l.startswith('#Weight:'):
try:
weight = float(l.split(': ')[1].strip())
except:
pass
return weight
# Get pregnancy status from patient data.
def get_pregnancy_status(data):
is_pregnant = None
for l in data.split('\n'):
if l.startswith('#Pregnancy status:'):
try:
is_pregnant = bool(sanitize_binary_value(l.split(': ')[1].strip()))
except:
pass
return is_pregnant
# Get murmur from patient data.
def get_murmur(data):
murmur = None
for l in data.split('\n'):
if l.startswith('#Murmur:'):
try:
murmur = l.split(': ')[1]
except:
pass
if murmur is None:
raise ValueError('No murmur available. Is your code trying to load labels from the hidden data?')
return murmur
# Get outcome from patient data.
def get_outcome(data):
outcome = None
for l in data.split('\n'):
if l.startswith('#Outcome:'):
try:
outcome = l.split(': ')[1]
except:
pass
if outcome is None:
raise ValueError('No outcome available. Is your code trying to load labels from the hidden data?')
return outcome
# Sanitize binary values from Challenge outputs.
def sanitize_binary_value(x):
x = str(x).replace('"', '').replace("'", "").strip() # Remove any quotes or invisible characters.
if (is_finite_number(x) and float(x)==1) or (x in ('True', 'true', 'T', 't')):
return 1
else:
return 0
# Santize scalar values from Challenge outputs.
def sanitize_scalar_value(x):
x = str(x).replace('"', '').replace("'", "").strip() # Remove any quotes or invisible characters.
if is_finite_number(x) or (is_number(x) and np.isinf(float(x))):
return float(x)
else:
return 0.0
# Save Challenge outputs.
def save_challenge_outputs(filename, patient_id, classes, labels, probabilities):
# Format Challenge outputs.
patient_string = '#{}'.format(patient_id)
class_string = ','.join(str(c) for c in classes)
label_string = ','.join(str(l) for l in labels)
probabilities_string = ','.join(str(p) for p in probabilities)
output_string = patient_string + '\n' + class_string + '\n' + label_string + '\n' + probabilities_string + '\n'
# Write the Challenge outputs.
with open(filename, 'w') as f:
f.write(output_string)
# Load Challenge outputs.
def load_challenge_outputs(filename):
with open(filename, 'r') as f:
for i, l in enumerate(f):
if i==0:
patient_id = l.replace('#', '').strip()
elif i==1:
classes = tuple(entry.strip() for entry in l.split(','))
elif i==2:
labels = tuple(sanitize_binary_value(entry) for entry in l.split(','))
elif i==3:
probabilities = tuple(sanitize_scalar_value(entry) for entry in l.split(','))
else:
break
return patient_id, classes, labels, probabilities