Spaces:
Running
Running
import gradio as gr | |
import pandas as pd | |
import matplotlib.font_manager as font_manager | |
import matplotlib.pyplot as plt | |
import warnings | |
warnings.filterwarnings(action='ignore', category=UserWarning) | |
font_path = './SimHei.ttf' # 替换为实际的字体文件路径 | |
font_manager.fontManager.addfont(font_path) | |
plt.rcParams['font.family'] = 'SimHei' # 将 'custom_font' 替换为您为该字体指定的名称 | |
plt.rcParams['axes.unicode_minus'] = False # 设置正常显示字符 | |
def process_file(file): | |
# 读取CSV或Excel文件并创建DataFrame | |
if file.name.endswith('.csv'): | |
df = pd.read_csv(file.name) | |
elif file.name.endswith('.xlsx') or file.name.endswith('.xls'): | |
df = pd.read_excel(file.name) | |
else: | |
return "不支持的数据文件格式。" | |
columns = df.columns.tolist() | |
feature_columns = columns[:-1] | |
last_column = [columns[-1]] | |
# 返回前5行数据,更新下拉列表选项,并使其他控件可见 | |
return (gr.update(choices=feature_columns, visible=True), | |
gr.update(choices=last_column, value=last_column[-1], visible=True), | |
gr.update(visible=True), | |
# gr.update(visible=True), | |
df.head(), | |
gr.update(visible=True) | |
) | |
def update_choice_radio(choice_feature_column): | |
# 更新数轴控件的可见性 | |
return gr.update(label="“" + choice_feature_column + "”是否为连续值", visible=True) | |
def update_slider(choice): | |
# 更新数轴控件的可见性 | |
return gr.update(visible=choice == "是") | |
def generate_output(file, column1, column2, choice, bins): | |
# 读取CSV或Excel文件并创建DataFrame | |
if file.name.endswith('.csv'): | |
df = pd.read_csv(file.name) | |
elif file.name.endswith('.xlsx') or file.name.endswith('.xls'): | |
df = pd.read_excel(file.name) | |
else: | |
return "不支持的数据文件格式。" | |
data_x = df[column1] | |
data_y = df[column2] | |
# 自动判断column1的数据类型 | |
if choice == "是": | |
# 如果是连续值,则进行分组 | |
data_x = pd.qcut(data_x, q=bins, duplicates='drop') | |
data_x = data_x.apply(lambda x: f'{x.left:.2f} - {x.right:.2f}') | |
else: | |
# 如果是离散值,则直接使用 | |
pass | |
# 统计每个身高分段中不同心血管疾病类别的数量 | |
counts = pd.crosstab(data_x, data_y) | |
# 设置画布大小 | |
plt.figure(figsize=(bins*2, 10)) | |
# 绘制分段柱形图 | |
counts.plot(kind='bar') | |
# 设置 x 轴刻度标签横向显示 | |
plt.xticks(rotation=15) | |
plt.xlabel(column1, fontsize=12) | |
plt.ylabel(column2, fontsize=12) | |
# plt.legend(['不患病', '患病']) | |
plt.title(f'{column1}与{column2}的关系', fontsize=14) | |
# plt.show() | |
image_path = 'output.png' | |
plt.savefig(image_path) | |
# plt.close() | |
return df.head(), gr.update(visible=True), image_path, gr.update(visible=True) | |
with gr.Blocks() as demo: | |
with gr.Row(): | |
with gr.Column(): | |
file_input = gr.File(label="上传表格文件(支持CSV、XLS、XLSX等格式", file_types=["csv", "xls", "xlsx"]) | |
col1_dropdown = gr.Dropdown(label="请选择特征列", visible=False) | |
col2_dropdown = gr.Dropdown(label="选择标签列", visible=False) | |
choice_radio = gr.Radio(["是", "否"], label="特征列是否为连续值", visible=False) # , value="否" | |
slider = gr.Slider(minimum=3, maximum=7, step=1, label="选择将特征列分组的分组数", visible=False, value=4) | |
submit_button = gr.Button("查看结果", visible=False) | |
with gr.Column(): | |
df_display = gr.Dataframe(visible=False) | |
output_image = gr.Image(visible=False) | |
# # 文件上传后调用 process_file 函数 | |
# file_input.upload(process_file, inputs=file_input, outputs=[col1_dropdown, col2_dropdown, choice_radio, df_display, df_display, submit_button]) | |
# 文件上传后调用 process_file 函数 | |
file_input.upload(process_file, inputs=file_input, outputs=[col1_dropdown, col2_dropdown, df_display, df_display, submit_button]) | |
# 选择框值改变时调用 update_col2_dropdown 函数 | |
col1_dropdown.change(update_choice_radio, inputs=col1_dropdown, outputs=choice_radio) | |
# 选择框值改变时调用 update_slide函数 | |
choice_radio.change(update_slider, inputs=choice_radio, outputs=slider) | |
# 点击提交按钮时调用 generate_output 函数 | |
submit_button.click(generate_output, inputs=[file_input, col1_dropdown, col2_dropdown, choice_radio, slider], outputs=[df_display, df_display, output_image, output_image]) | |
demo.launch(share=True) |