| | |
| | from numpy import isnan |
| | import pandas as pd |
| |
|
| | |
| | file_path = '<YOUR_DATA_PATH>/' |
| | input_file_path = file_path + 'data_for_model_e_columns/' |
| |
|
| |
|
| | def read_data(file): |
| | """ |
| | Read in data source |
| | -------- |
| | :param file: string filename |
| | :return: dataframe |
| | """ |
| | df = pd.read_csv(file) |
| | return df |
| |
|
| |
|
| | def GOLD_grade(data): |
| | """ |
| | Calculate GOLD grade for COPD classification using FEV1% |
| | -------- |
| | :param data: dataframe containing FEV1% column |
| | :return: GOLD grade values based on if else statement |
| | """ |
| | if (data['FEV1%'] >= 80): |
| | val = 'GOLD 1' |
| | elif (data['FEV1%'] >= 50) & (data['FEV1%'] < 80): |
| | val = 'GOLD 2' |
| | elif (data['FEV1%'] >= 30) & (data['FEV1%'] < 50): |
| | val = 'GOLD 3' |
| | else: |
| | val = 'GOLD 4' |
| | return val |
| |
|
| |
|
| | def GOLD_group(data): |
| | """ |
| | Calculate GOLD group from admissions data, exacerbations data, and CAT data |
| | -------- |
| | :param data: dataframe containing CAT, exacerbations, and admissions data |
| | :return: GOLD group values based on if else statement |
| | """ |
| | if (data['CAT_baseline'] >= 10) & (data['Prior_Ad'] > 0) | (data['exac_prev_year'] > 1): |
| | val = 'GOLD group D' |
| | elif (data['CAT_baseline'] < 10) & (data['Prior_Ad'] > 0) | (data['exac_prev_year'] > 1): |
| | val = 'GOLD group C' |
| | elif (data['CAT_baseline'] >= 10) & ((data['Prior_Ad'] == 0) | (data['exac_prev_year'] < 2) | isnan(data['exac_prev_year'])): |
| | val = 'GOLD group B' |
| | else: |
| | val = 'GOLD group A' |
| | return val |
| | |
| |
|
| | def apply_if_else(data, condition): |
| | """ |
| | Apply the criteria of an if else statement to all rows |
| | -------- |
| | :param data: dataframe |
| | :condition: else if statement |
| | :return: dataframe with column based on if else statement |
| | """ |
| | return data.apply(condition, axis=1) |
| |
|
| |
|
| | def main(): |
| | |
| | RC_SU1_characteristics_file = input_file_path + "Cohort_characteristics_data_RC_SU.csv" |
| | RC_SU1_characteristics_data = read_data(RC_SU1_characteristics_file) |
| |
|
| | |
| | GOLD_data = RC_SU1_characteristics_data[['ID', 'FEV1%', 'CAT_baseline', 'Prior_Ad', 'exac_prev_year']] |
| |
|
| | |
| | GOLD_data['GOLD grade'] = apply_if_else(GOLD_data, GOLD_grade) |
| | GOLD_data['GOLD group'] = apply_if_else(GOLD_data, GOLD_group) |
| |
|
| | |
| | GOLD_data.to_csv(file_path + 'GOLD_data.csv') |
| | |
| |
|
| | main() |