Spaces:

IDKHowToCodeFr
/

tinyml-backend

Sleeping

App Files Files Community

tinyml-backend / preprocessing.py

IDKHowToCodeFr

Upload folder using huggingface_hub

f7b331f verified about 1 month ago

Raw

History Blame Contribute Delete

2.71 kB

	import pandas as pd
	import numpy as np
	import os
	import joblib
	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import StandardScaler, LabelEncoder

	def resolve_model_dir():
	# Resolve relative paths universally whether Docker or local execution
	return '/app/model' if os.path.exists('/app/model') else '../model' if os.path.exists('../model') else 'model'

	def preprocess_data(df, is_training=True):
	model_dir = resolve_model_dir()
	scaler_path = f'{model_dir}/scaler.pkl'
	label_encoder_path = f'{model_dir}/label_encoder.pkl'

	columns_to_drop = ['Patient Number', 'Data Accuracy (%)', 'Heart Rate Alert', 'SpO2 Level Alert', 'Blood Pressure Alert', 'Temperature Alert']
	cols_drop = [c for c in columns_to_drop if c in df.columns]
	X_raw = df.drop(columns=cols_drop)

	y = None
	if 'Predicted Disease' in X_raw.columns:
	y_raw = X_raw['Predicted Disease']
	X_raw = X_raw.drop(columns=['Predicted Disease'])

	if is_training:
	label_encoder = LabelEncoder()
	y = label_encoder.fit_transform(y_raw)
	os.makedirs(os.path.dirname(label_encoder_path), exist_ok=True)
	joblib.dump(label_encoder, label_encoder_path)
	joblib.dump(label_encoder.classes_, f'{model_dir}/classes.pkl')
	else:
	if os.path.exists(label_encoder_path):
	label_encoder = joblib.load(label_encoder_path)
	y = label_encoder.transform(y_raw)
	else:
	y = y_raw

	if 'Fall Detection' in X_raw.columns:
	X_raw['Fall Detection'] = X_raw['Fall Detection'].map({'Yes': 1, 'No': 0}).fillna(0)

	X_raw.fillna(X_raw.mean(), inplace=True)

	# Engineered Feature
	X_raw['Risk_Severity'] = (X_raw['Heart Rate (bpm)'] > 105).astype(int) + (X_raw['SpO2 Level (%)'] < 94).astype(int)

	continuous_features = ['Heart Rate (bpm)', 'SpO2 Level (%)', 'Systolic Blood Pressure (mmHg)', 'Diastolic Blood Pressure (mmHg)', 'Body Temperature (°C)', 'Risk_Severity']

	if is_training:
	scaler = StandardScaler()
	X_raw[continuous_features] = scaler.fit_transform(X_raw[continuous_features])
	joblib.dump(scaler, scaler_path)
	else:
	if os.path.exists(scaler_path):
	scaler = joblib.load(scaler_path)
	X_raw[continuous_features] = scaler.transform(X_raw[continuous_features])

	return X_raw, y

	def get_train_test_split(df):
	X, y = preprocess_data(df, is_training=True)
	return train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)