|
|
|
import pandas as pd |
|
|
|
columns_english = ["Gender", "Education", "Hemoglobin", "Total_Protein", "Albumin", "HDL", "AST", "Direct_Bilirubin", |
|
"Sleep", "SF-36", "SAS", "SDS"] |
|
file_path = "new_data.csv" |
|
|
|
data = pd.read_csv(file_path, encoding='ISO-8859-1', names=columns_english, header=0) |
|
|
|
education_levels = { |
|
0: "elementary school", |
|
1: "junior high school", |
|
2: "high school/diploma", |
|
3: "bachelor/master/phd" |
|
} |
|
|
|
gender_dict = { |
|
0: "male", |
|
1: "female" |
|
} |
|
|
|
|
|
|
|
def classify_sas_sds(score, threshold, mild, moderate, severe): |
|
if score < threshold: |
|
return 0 |
|
elif threshold <= score < mild: |
|
return 1 |
|
elif mild <= score < moderate: |
|
return 2 |
|
elif score >= severe: |
|
return 3 |
|
|
|
|
|
|
|
descriptions = "" |
|
|
|
new_data = pd.DataFrame({ |
|
"Description": descriptions, |
|
"SAS_Class": [classify_sas_sds(row["SAS"], 50, 59, 69, 70) for _, row in data.iterrows()], |
|
"SDS_Class": [classify_sas_sds(row["SDS"], 53, 62, 72, 73) for _, row in data.iterrows()] |
|
}) |
|
|
|
|
|
new_descriptions = [] |
|
for _, row in data.iterrows(): |
|
gender = gender_dict[row["Gender"]] |
|
education = education_levels[row["Education"]] |
|
description = f"A patient of gender {gender} and educational background {education}, has hemoglobin {row['Hemoglobin']}g/L, total protein {row['Total_Protein']}g/L, albumin {row['Albumin']}g/L, HDL {row['HDL']}mmol/L, AST {row['AST']}U/L, direct bilirubin {row['Direct_Bilirubin']}μmol/L, sleeps {row['Sleep']} hours, and has an SF-36 score of {row['SF-36']}." |
|
new_descriptions.append(description) |
|
|
|
|
|
new_data["Description"] = new_descriptions |
|
|
|
|
|
new_csv_path = 'processed_new_data.csv' |
|
new_data.to_csv(new_csv_path, index=False) |
|
|
|
print(new_csv_path) |
|
|