|
from collections import OrderedDict |
|
from typing import Optional |
|
|
|
ed_cxr_token_type_ids = { |
|
'medrecon': 0, |
|
'edstays': 1, |
|
'triage': 2, |
|
'vitalsign': 3, |
|
'pyxis': 4, |
|
'mimic_cxr_2_0_0_metadata': 5, |
|
'medrecon_name': 6, |
|
'triage_chiefcomplaint': 7, |
|
'triage_pain': 8, |
|
'vitalsign_pain': 9, |
|
'indication': 10, |
|
'history': 11, |
|
'findings': 12, |
|
'impression': 13, |
|
'image': 14, |
|
'comparison': 15, |
|
'previous_findings': 16, |
|
'previous_impression': 17, |
|
'previous_image': 18, |
|
} |
|
|
|
NUM_ED_CXR_TOKEN_TYPE_IDS = max(ed_cxr_token_type_ids.values()) + 1 |
|
|
|
|
|
class TableConfig: |
|
def __init__( |
|
self, |
|
name: str, |
|
hadm_id_filter: bool = False, |
|
stay_id_filter: bool = False, |
|
study_id_filter: bool = False, |
|
subject_id_filter: bool = True, |
|
load: Optional[bool] = None, |
|
groupby: Optional[str] = None, |
|
index_columns: list = [], |
|
text_columns: list = [], |
|
value_columns: list = [], |
|
time_columns: list = [], |
|
target_sections: list = [], |
|
use_start_time: bool = False, |
|
mimic_cxr_sectioned: bool = False, |
|
): |
|
self.name = name |
|
self.hadm_id_filter = hadm_id_filter |
|
self.stay_id_filter = stay_id_filter |
|
self.study_id_filter = study_id_filter |
|
self.subject_id_filter = subject_id_filter |
|
self.load = load |
|
self.groupby = groupby |
|
self.index_columns_source = [index_columns] if isinstance(index_columns, str) else index_columns |
|
self.index_columns = [f'{i}_index' for i in self.index_columns_source] |
|
self.text_columns = [text_columns] if isinstance(text_columns, str) else text_columns |
|
self.value_columns = [value_columns] if isinstance(value_columns, str) else value_columns |
|
self.time_columns = [time_columns] if isinstance(time_columns, str) else time_columns |
|
self.target_sections = [target_sections] if isinstance(target_sections, str) else target_sections |
|
self.use_start_time = use_start_time |
|
self.mimic_cxr_sectioned = mimic_cxr_sectioned |
|
|
|
assert self.time_columns is None or isinstance(self.time_columns, list) |
|
|
|
self.value_column_to_idx = {} |
|
self.total_indices = None |
|
|
|
|
|
|
|
""" |
|
Order the tables for position_ids based on their order of occurance (for cases where their time deltas are matching). |
|
The way that they are ordered here is the way that they will be ordered as input. |
|
|
|
1. medrecon - the medications which the patient was taking prior to their ED stay. |
|
2. edstays - patient stays are tracked in the edstays table. |
|
3. triage - information collected from the patient at the time of triage. |
|
4. vitalsign - aperiodic vital signs documented for patients during their stay. |
|
5. pyxis - dispensation information for medications provided by the BD Pyxis MedStation (position is interchangable with 4). |
|
""" |
|
ed_module_tables = OrderedDict( |
|
{ |
|
'medrecon': TableConfig( |
|
'Medicine reconciliation', |
|
stay_id_filter=True, |
|
load=True, |
|
index_columns=['gsn', 'ndc', 'etc_rn', 'etccode'], |
|
text_columns='name', |
|
groupby='stay_id', |
|
use_start_time=True, |
|
), |
|
'edstays': TableConfig( |
|
'ED admissions', |
|
stay_id_filter=True, |
|
load=True, |
|
index_columns=['gender', 'race', 'arrival_transport'], |
|
groupby='stay_id', |
|
time_columns='intime', |
|
), |
|
'triage': TableConfig( |
|
'Triage', |
|
stay_id_filter=True, |
|
load=True, |
|
text_columns=['chiefcomplaint', 'pain'], |
|
value_columns=['temperature', 'heartrate', 'resprate', 'o2sat', 'sbp', 'dbp', 'acuity'], |
|
groupby='stay_id', |
|
use_start_time=True, |
|
), |
|
'vitalsign': TableConfig( |
|
'Aperiodic vital signs', |
|
stay_id_filter=True, |
|
load=True, |
|
index_columns=['rhythm'], |
|
text_columns=['pain'], |
|
value_columns=['temperature', 'heartrate', 'resprate', 'o2sat', 'sbp', 'dbp'], |
|
groupby='charttime', |
|
time_columns='charttime', |
|
), |
|
'pyxis': TableConfig( |
|
'Dispensation information for medications provided by the BD Pyxis MedStation', |
|
stay_id_filter=True, |
|
load=True, |
|
index_columns=['med_rn', 'name', 'gsn_rn', 'gsn'], |
|
groupby='charttime', |
|
time_columns='charttime', |
|
), |
|
'diagnosis': TableConfig('Diagnosis', stay_id_filter=True, hadm_id_filter=False), |
|
} |
|
) |
|
|
|
|
|
mimic_cxr_tables = OrderedDict( |
|
{ |
|
'mimic_cxr_2_0_0_metadata': TableConfig( |
|
'Metadata', |
|
study_id_filter=True, |
|
load=True, |
|
index_columns=[ |
|
'PerformedProcedureStepDescription', |
|
'ViewPosition', |
|
'ProcedureCodeSequence_CodeMeaning', |
|
'ViewCodeSequence_CodeMeaning', |
|
'PatientOrientationCodeSequence_CodeMeaning', |
|
], |
|
groupby='study_id', |
|
), |
|
'mimic_cxr_sectioned': TableConfig( |
|
'Report sections', |
|
mimic_cxr_sectioned=True, |
|
subject_id_filter=False, |
|
load=True, |
|
groupby='study', |
|
text_columns=['indication', 'history', 'comparison'], |
|
target_sections=['findings', 'impression'], |
|
), |
|
'mimic_cxr_2_0_0_chexpert': TableConfig('CheXpert', study_id_filter=True), |
|
'mimic_cxr_2_0_0_split': TableConfig('Split', study_id_filter=True), |
|
} |
|
) |
|
|
|
|