| | """ |
| | Utility functions common across admission processing |
| | (admissions/comorbidities/gples) |
| | """ |
| | import pandas as pd |
| | from utils.common import read_data |
| | from utils.adm_processing import (update_null_stay, calculate_total_stay, |
| | search_diag) |
| |
|
| |
|
| | def initialize_adm_data(adm_file): |
| | """ |
| | Load in and convert admission dataset to correct format |
| | -------- |
| | :param adm_file: admission data file name |
| | :return: admission dataframe with correct column names and types |
| | """ |
| | print('Loading admission data') |
| |
|
| | |
| | adm_cols = ['SafeHavenID', 'ETHGRP', 'ADMDATE', 'DISDATE', 'STAY', |
| | 'DIAG1Desc', 'DIAG2Desc', 'DIAG3Desc', 'DIAG4Desc', |
| | 'DIAG5Desc', 'DIAG6Desc'] |
| | adm_types = ['int', 'object', 'object', 'object', 'int', |
| | 'str', 'str', 'str', 'str', 'str', 'str'] |
| | df = read_data(adm_file, adm_cols, adm_types) |
| |
|
| | |
| | df = df.drop_duplicates() |
| |
|
| | |
| | df['ADMDATE'] = pd.to_datetime(df['ADMDATE']) |
| | df['DISDATE'] = pd.to_datetime(df['DISDATE']) |
| |
|
| | return df |
| |
|
| |
|
| | def correct_stays(df): |
| | """ |
| | Fill any null STAY data and consolidate any transfer admissions into single |
| | admission occurrences |
| | -------- |
| | :param df: admission dataframe to be corrected |
| | :return: admission dataframe with null stays filled and transfers combined |
| | """ |
| | print('Correcting stays') |
| |
|
| | |
| | df = update_null_stay(df) |
| |
|
| | |
| | df = df.sort_values(['SafeHavenID', 'ADMDATE', 'DISDATE']) |
| | df = df.groupby('SafeHavenID').apply(calculate_total_stay) |
| | df = df.reset_index(drop=True) |
| |
|
| | return df |
| |
|
| |
|
| | def track_copd_resp(df): |
| | """ |
| | Search for COPD and/or respiratory admissions |
| | -------- |
| | :param df: admission dataframe to be updated |
| | :return: updated dataframe with events tracked |
| | """ |
| | print('Tracking events') |
| |
|
| | |
| | df = df.apply(lambda x: x.str.strip() if x.dtype == 'object' else x) |
| |
|
| | |
| | df = search_diag(df, 'copd') |
| |
|
| | |
| | df = search_diag(df, 'resp') |
| |
|
| | return df |