Spaces:

Tokymin
/

Mood_Anxiety_Disorder_Classify

Sleeping

App Files Files Community

Mood_Anxiety_Disorder_Classify / dataset /process_scripts_transfer.py

Tokymin

编写了数据处理已经预训练的基础代码

1f4f3bd 8 months ago

raw

history blame

2.33 kB

	# 重新加载数据集，现在我们有了列名的准确英文对照和如何处理SAS和SDS列的具体信息
	import pandas as pd

	columns_english = ["Gender", "Education", "Hemoglobin", "Total_Protein", "Albumin", "HDL", "AST", "Direct_Bilirubin",
	"Sleep", "SF-36", "SAS", "SDS"]
	file_path = "new_data.csv"
	# 由于文件未变，直接使用新列名重新加载数据
	data = pd.read_csv(file_path, encoding='ISO-8859-1', names=columns_english, header=0)
	# 定义学历对应的英文描述
	education_levels = {
	0: "elementary school",
	1: "junior high school",
	2: "high school/diploma",
	3: "bachelor/master/phd"
	}
	# 定义性别对应的英文描述
	gender_dict = {
	0: "male",
	1: "female"
	}


	# 定义SAS和SDS得分的分类
	def classify_sas_sds(score, threshold, mild, moderate, severe):
	if score < threshold:
	return 0 # Normal
	elif threshold <= score < mild:
	return 1 # Mild
	elif mild <= score < moderate:
	return 2 # Moderate
	elif score >= severe:
	return 3 # Severe


	# 转换每行数据为自然语言描述
	descriptions = ""
	# 根据要求，将创建一个新的DataFrame，其中包括更新后的描述和单独的sas_class和sds_class列
	new_data = pd.DataFrame({
	"Description": descriptions,
	"SAS_Class": [classify_sas_sds(row["SAS"], 50, 59, 69, 70) for _, row in data.iterrows()],
	"SDS_Class": [classify_sas_sds(row["SDS"], 53, 62, 72, 73) for _, row in data.iterrows()]
	})

	# 生成不包含sas_class和sds_class标签的描述
	new_descriptions = []
	for _, row in data.iterrows():
	gender = gender_dict[row["Gender"]]
	education = education_levels[row["Education"]]
	description = f"A patient of gender {gender} and educational background {education}, has hemoglobin {row['Hemoglobin']}g/L, total protein {row['Total_Protein']}g/L, albumin {row['Albumin']}g/L, HDL {row['HDL']}mmol/L, AST {row['AST']}U/L, direct bilirubin {row['Direct_Bilirubin']}μmol/L, sleeps {row['Sleep']} hours, and has an SF-36 score of {row['SF-36']}."
	new_descriptions.append(description)

	# 更新DataFrame中的描述列
	new_data["Description"] = new_descriptions

	# 将新的DataFrame保存为CSV文件
	new_csv_path = 'processed_new_data.csv'
	new_data.to_csv(new_csv_path, index=False)

	print(new_csv_path)