Spaces:

ElvisWang111
/

AutoSTAT

Running

App Files Files Community

ElvisWang111 commited on 21 days ago

Commit

495d32d

verified ·

1 Parent(s): f0c9413

Upload folder using huggingface_hub

Browse files

Files changed (16) hide show

.DS_Store +0 -0
.gitattributes +1 -0
app.py +31 -9
logo/.DS_Store +0 -0
logo/github.jpeg +3 -0
logo/logo_big.png +2 -2
prompt_engineer/planner.py +5 -2
prompt_engineer/sec1_call_llm.py +5 -0
prompt_engineer/sec2_call_llm.py +12 -0
prompt_engineer/sec3_call_llm.py +10 -0
prompt_engineer/sec4_call_llm.py +14 -47
prompt_engineer/sec5_call_llm.py +13 -51
workflow/dataloading/dataloading_render.py +1 -1
workflow/preference/pref_render.py +81 -0
workflow/preprocessing/preprocessing_core.py +1 -1
workflow/report/report_utils.py +1 -1

.DS_Store ADDED Viewed

Binary file (8.2 kB). View file

.gitattributes CHANGED Viewed

@@ -37,3 +37,4 @@ logo/logo_16_9.png filter=lfs diff=lfs merge=lfs -text
 logo/logo_big.png filter=lfs diff=lfs merge=lfs -text
 logo/logo_blue_wide.png filter=lfs diff=lfs merge=lfs -text
 logo/logo_wide.png filter=lfs diff=lfs merge=lfs -text

 logo/logo_big.png filter=lfs diff=lfs merge=lfs -text
 logo/logo_blue_wide.png filter=lfs diff=lfs merge=lfs -text
 logo/logo_wide.png filter=lfs diff=lfs merge=lfs -text
+logo/github.jpeg filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -52,6 +52,12 @@ def init_session_state():
         st.session_state.modeling_start_time = None
     if 'report_start_time' not in st.session_state:
         st.session_state.report_start_time = None
     if 'data_loading_agent' not in st.session_state:
         st.session_state.data_loading_agent = DataLoadingAgent(
@@ -123,11 +129,11 @@ def run_app():
             type="password",
             key="api_key_input",
         )
-        st.session_state.api_keys[selected] = api_key_input
         if st.button("💾 保存密钥", use_container_width=True, key="save_key"):
             # 保存在 utils/.streamlit/secrets.toml
-            # update_local_api_key(selected, api_key_input)
             st.session_state.api_keys[selected] = api_key_input
             st.success("已保存")
@@ -170,10 +176,22 @@ def run_app():
         if st.session_state.data_loading_agent.load_df() is not None:
             planner = st.session_state.planner_agent
-            if st.button("🚗 自动模式", use_container_width=True, key="self_driving"):
-                planner.self_driving(st.session_state.data_loading_agent.load_df())
-                st.session_state.auto_mode = True
-                st.rerun()
         st.image(
             "logo/logo_big.png",
@@ -181,13 +199,17 @@ def run_app():
         )
     # Define pages
     data_loading = st.Page(
         "workflow/dataloading/dataloading_render.py",
         title="📥 数据导入",
     )
     preprocessing = st.Page(
         "workflow/preprocessing/preprocessing_render.py",
-        title="⚙️ 数据预处理",
     )
     visualization = st.Page(
         "workflow/visualization/viz_render.py",
@@ -204,8 +226,8 @@ def run_app():
     # Navigation
     pg = st.navigation(
         {
-            "设置": [data_loading, preprocessing],
-            "功能": [visualization, coding_modeling, report],
         }
     )
     pg.run()

         st.session_state.modeling_start_time = None
     if 'report_start_time' not in st.session_state:
         st.session_state.report_start_time = None
+    if 'preference_select' not in st.session_state:
+        st.session_state.preference_select = None
+    if 'additional_preference' not in st.session_state:
+        st.session_state.additional_preference = None
+    if "from_auto" not in st.session_state:
+        st.session_state.from_auto = False
     if 'data_loading_agent' not in st.session_state:
         st.session_state.data_loading_agent = DataLoadingAgent(
             type="password",
             key="api_key_input",
         )
         if st.button("💾 保存密钥", use_container_width=True, key="save_key"):
             # 保存在 utils/.streamlit/secrets.toml
+            update_local_api_key(selected, api_key_input)
             st.session_state.api_keys[selected] = api_key_input
             st.success("已保存")
         if st.session_state.data_loading_agent.load_df() is not None:
             planner = st.session_state.planner_agent
+            if st.session_state.auto_mode is False:
+                if st.button("🚗 自动模式", use_container_width=True):
+                    st.session_state.auto_mode = True
+                    planner.self_driving(st.session_state.data_loading_agent.load_df())
+                    st.switch_page("workflow/dataloading/dataloading_render.py")
+                    st.rerun()
+            else:
+                if st.button("❌ 结束自动模式", use_container_width=True):
+                    st.session_state.auto_mode = False
+                    st.session_state.planner_agent = PlannerAgent(
+                    api_keys=st.session_state.api_keys,
+                    model_configs=MODEL_CONFIGS,
+                    model=st.session_state.selected_model
+                    )
+                    st.rerun()
         st.image(
             "logo/logo_big.png",
         )
     # Define pages
+    preference = st.Page(
+        "workflow/preference/pref_render.py",
+        title="⚙️ 偏好设置",
+    )
     data_loading = st.Page(
         "workflow/dataloading/dataloading_render.py",
         title="📥 数据导入",
     )
     preprocessing = st.Page(
         "workflow/preprocessing/preprocessing_render.py",
+        title="🛠️ 数据预处理",
     )
     visualization = st.Page(
         "workflow/visualization/viz_render.py",
     # Navigation
     pg = st.navigation(
         {
+            "功能": [data_loading, preprocessing, visualization, coding_modeling, report],
+            "设置": [preference]
         }
     )
     pg.run()

logo/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

logo/github.jpeg ADDED Viewed

Git LFS Details

SHA256: 9cebf5601817093d34da98b3bb505df23fd01c73ee60677561fefc33908f21f4
Pointer size: 131 Bytes
Size of remote file: 166 kB

logo/logo_big.png CHANGED Viewed

Git LFS Details

SHA256: 389ffb2d5eec47539b6aee2ef89d4949d3bfa2d94d16c2d7198bd7ef394beb59
Pointer size: 131 Bytes
Size of remote file: 326 kB

Git LFS Details

SHA256: 6151a260c6438af0daa904f055a1b29cb5b1fb683a5f4314d65b07f3f6dc6fe3
Pointer size: 131 Bytes
Size of remote file: 231 kB

prompt_engineer/planner.py CHANGED Viewed

@@ -33,8 +33,10 @@ class PlannerAgent(LLMClient):
             f"- 前 5 行样本：\n{df.head().to_dict(orient='list')}\n\n"
         )
-        if user_input:
-            prompt += f"用户的具体需求是：“{user_input}”。\n\n"
         prompt += """
         你需要在以下 5 个步骤中，对每个步骤分别判断是否应该开启（True / False）：
@@ -66,6 +68,7 @@ class PlannerAgent(LLMClient):
         print(plan_dict)
         self.loading_auto = bool(plan_dict.get("loading_auto", False))
         self.prep_auto = bool(plan_dict.get("prep_auto", False))
         self.vis_auto = bool(plan_dict.get("vis_auto", False))
         self.modeling_auto = bool(plan_dict.get("modeling_auto", False))

             f"- 前 5 行样本：\n{df.head().to_dict(orient='list')}\n\n"
         )
+        if st.session_state.preference_select:
+            prompt += f"以下是用户的分析偏好设置：{st.session_state.preference_select}”。\n\n"
+        if st.session_state.additional_preference:
+            prompt += f"用户提供了以下建模目的与特殊需求：{st.session_state.additional_preference}”。\n\n"
         prompt += """
         你需要在以下 5 个步骤中，对每个步骤分别判断是否应该开启（True / False）：
         print(plan_dict)
         self.loading_auto = bool(plan_dict.get("loading_auto", False))
+        self.loading_auto = True
         self.prep_auto = bool(plan_dict.get("prep_auto", False))
         self.vis_auto = bool(plan_dict.get("vis_auto", False))
         self.modeling_auto = bool(plan_dict.get("modeling_auto", False))

prompt_engineer/sec1_call_llm.py CHANGED Viewed

@@ -153,6 +153,11 @@ class DataLoadingAgent(LLMClient):
             - 重点突出数据结构、含义与潜在问题。
             """
         desc = self.call(prompt)
         return desc

             - 重点突出数据结构、含义与潜在问题。
             """
+        if st.session_state.preference_select:
+            prompt += f"以下是用户的分析偏好设置：{st.session_state.preference_select}”。\n\n"
+        if st.session_state.additional_preference:
+            prompt += f"用户提供了以下建模目的与特殊需求：{st.session_state.additional_preference}”。\n\n"
         desc = self.call(prompt)
         return desc

prompt_engineer/sec2_call_llm.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import numpy as np
 import pandas as pd
 from prompt_engineer.call_llm import LLMClient
@@ -186,6 +187,11 @@ class DataPreprocessAgent(LLMClient):
             输出应保持结构化与连贯性，避免重复说明。
             """
         suggestions = self.call(prompt)
         return suggestions
@@ -268,6 +274,12 @@ class DataPreprocessAgent(LLMClient):
         if self.refined_suggestions is not None:
             prompt += f"LLM返回的预处理建议：{self.refined_suggestions}"
         raw = self.call(prompt)
         return raw

 import numpy as np
 import pandas as pd
+import streamlit as st
 from prompt_engineer.call_llm import LLMClient
             输出应保持结构化与连贯性，避免重复说明。
             """
+        if st.session_state.preference_select:
+            prompt += f"以下是用户的分析偏好设置：{st.session_state.preference_select}”。\n\n"
+        if st.session_state.additional_preference:
+            prompt += f"用户提供了以下建模目的与特殊需求：{st.session_state.additional_preference}”。\n\n"
         suggestions = self.call(prompt)
         return suggestions
         if self.refined_suggestions is not None:
             prompt += f"LLM返回的预处理建议：{self.refined_suggestions}"
+        if st.session_state.preference_select:
+            prompt += f"以下是用户的分析偏好设置：{st.session_state.preference_select}”。\n\n"
+        if st.session_state.additional_preference:
+            prompt += f"用户提供了以下建模目的与特殊需求：{st.session_state.additional_preference}”。\n\n"
         raw = self.call(prompt)
         return raw

prompt_engineer/sec3_call_llm.py CHANGED Viewed

@@ -227,6 +227,11 @@ class VisualizationAgent(LLMClient):
             6. 禁止输出代码。
             """.strip()
         recommendations = self.call(prompt)
         return recommendations
@@ -545,6 +550,11 @@ class VisualizationAgent(LLMClient):
             else:
                 self.debug_num = 0
         raw = self.call(prompt)
         return raw

             6. 禁止输出代码。
             """.strip()
+        if st.session_state.preference_select:
+            prompt += f"以下是用户的分析偏好设置：{st.session_state.preference_select}”。\n\n"
+        if st.session_state.additional_preference:
+            prompt += f"用户提供了以下建模目的与特殊需求：{st.session_state.additional_preference}”。\n\n"
         recommendations = self.call(prompt)
         return recommendations
             else:
                 self.debug_num = 0
+        if st.session_state.preference_select:
+            prompt += f"以下是用户的分析偏好设置：{st.session_state.preference_select}”。\n\n"
+        if st.session_state.additional_preference:
+            prompt += f"用户提供了以下建模目的与特殊需求：{st.session_state.additional_preference}”。\n\n"
         raw = self.call(prompt)
         return raw

prompt_engineer/sec4_call_llm.py CHANGED Viewed

@@ -262,6 +262,11 @@ class ModelingCodingAgent(LLMClient):
             else:
                 self.debug_num = 0
         raw = self.call(prompt)
         return raw
@@ -291,6 +296,11 @@ class ModelingCodingAgent(LLMClient):
                 {result_json}
                 """.strip()
         raw = self.call(prompt)
         return raw
@@ -378,54 +388,11 @@ class ModelingCodingAgent(LLMClient):
             3. 保持语言专业、简洁，不输出代码。
             """
-        # # === 主 prompt 组装 ===
-        # prompt = f"""
-        # 你是一位资深的机器学习建模专家。
-        # 以下是用户的数据信息：
-        # {data_info}
-        # {memory_block}
-        # """.strip()
-        # # 若用户有明确建模目标
-        # if getattr(self, "target", None):
-        #     prompt += f"\n\n建模目标：{self.target}（务必满足，并请在回答中复述）"
-        # # 若用户额外输入了需求
-        # if user_input:
-        #     prompt += f"""\n\n用户的当前需求：{user_input}（务必满足！）
-        #     若用户的要求是局部更新，则保留先前内容，仅修改特定部分。"""
-        # # 若有之前生成的训练代码
-        # train_code = self.load_code()
-        # if train_code:
-        #     prompt += f"""
-        #     用户之前生成的训练代码：
-        #     {train_code}
-        #     请在理解该代码的基础上，提供 **1–2 条模型改进建议**，
-        #     可涉及但不限于：
-        #     - 模型结构调整
-        #     - 特征工程优化
-        #     - 模型替换（例如从树模型切换为深度学习模型）
-        #     - 超参数调整或正则化策略优化
-        #     """
-        # else:
-        #     prompt += """
-        #     请基于数据特征，推荐 2–3 个合适的模型，
-        #     并说明每个模型的适用场景和优劣分析。
-        #     """
-        # # 若存在以往建模结果
-        # modeling_result = self.load_modeling_result()
-        # if modeling_result:
-        #     prompt += f"\n\n用户之前的模型运行结果：\n{modeling_result}"
-        # === 调用 LLM ===
         raw = self.call(prompt)
         return raw

             else:
                 self.debug_num = 0
+        if st.session_state.preference_select:
+            prompt += f"以下是用户的分析偏好设置：{st.session_state.preference_select}”。\n\n"
+        if st.session_state.additional_preference:
+            prompt += f"用户提供了以下建模目的与特殊需求：{st.session_state.additional_preference}”。\n\n"
         raw = self.call(prompt)
         return raw
                 {result_json}
                 """.strip()
+        if st.session_state.preference_select:
+            prompt += f"以下是用户的分析偏好设置：{st.session_state.preference_select}”。\n\n"
+        if st.session_state.additional_preference:
+            prompt += f"用户提供了以下建模目的与特殊需求：{st.session_state.additional_preference}”。\n\n"
         raw = self.call(prompt)
         return raw
             3. 保持语言专业、简洁，不输出代码。
             """
+        if st.session_state.preference_select:
+            prompt += f"以下是用户的分析偏好设置：{st.session_state.preference_select}”。\n\n"
+        if st.session_state.additional_preference:
+            prompt += f"用户提供了以下建模目的与特殊需求：{st.session_state.additional_preference}”。\n\n"
         raw = self.call(prompt)
         return raw

prompt_engineer/sec5_call_llm.py CHANGED Viewed

@@ -354,6 +354,10 @@ class ReportAgent(LLMClient):
         {full_summary}
         """
         toc_response = self.call(prompt)
         return toc_response.strip()
@@ -408,6 +412,11 @@ class ReportAgent(LLMClient):
         1. 若章节间存在嵌套关系，优先分配给最具体的子章节（如 3.1.2 比 3.1 更优）。
         """
         toc_with_figs = self.call(prompt)
         return toc_with_figs.strip()
@@ -560,57 +569,10 @@ class ReportAgent(LLMClient):
         请严格在以上范围内撰写本章节正文。
         """
-        # prompt = f"""
-        # 你是一个专业的数据分析报告撰写助手。你需要基于我提供的参考信息进行深入分析，生成结构严谨、逻辑清晰的专业报告章节。
-        # 当前需要撰写的章节信息（完整四元组，依次为标题、层级、内容大纲、图编号列表）：
-        # {t}
-        # 报告目录结构（包含所有章节的四元组信息）：
-        # {toc}
-        # 可参考的分析内容如下：
-        # {selected_full_contents}
-        # 此前已生成的章节内容如下（用于保持整体风格一致，并避免内容重复）：
-        # {history_content}
-        # 请根据章节标题、层级、内容大纲、参考信息和图编号列表，生成该章节的完整正文。
-        # 正文详细程度有三种模式：
-        # - 简要：只包含核心结论与关键点，语言精炼；
-        # - 标准：包含主要分析逻辑、步骤与结果；
-        # - 详细：展开完整分析、方法论、推理过程与补充说明。
-        # 用户当前选择的模式是：{self.outline_length}
-        # 写作要求：
-        # 1. **核心任务**：仅撰写当前章节 **“{t[0]}”** 的正文内容，不得涉及其他章节。
-        # 2. **图表引用**：正文中引用的图表必须严格对应本章节的图编号（即 {t[3]}），**不得使用或编造其他编号**。
-        # 3. **语言规范**：
-        # - 语言应专业、准确、逻辑严谨；
-        # - 叙述风格应正式、学术化；
-        # - 禁止使用口语化或主观色彩表达。
-        # 4. **输出要求**：
-        # - 仅输出章节正文内容，不得输出 Markdown；
-        # - 不得输出任何标题，如：1，一，（1）等；
-        # - 禁止加粗、斜体、表情符号或其他符号修饰；
-        # - 不得出现非正文短语，如 “我认为”、“请继续”、“感谢阅读”、“---” 等；
-        # 5. **图片规范**：
-        # - 图片应独立成行，不得嵌入句子内部；
-        # - 图片可放置在段落的开头、结尾，或自然停顿处（如句号、分号后），以保持语义连贯；
-        # - 使用占位符格式 [FIG:index] 标记图片位置，其中 index 为对应图片的编号；
-        # - 在每个 [FIG:index] 占位符后，需紧跟一行图片标题，格式如下：
-        #       图：图片标题（简要说明图片内容及分析要点）
-        # - 图片插入位置应依据其语义和上下文逻辑确定：
-        #       · 若图片用于引出分析，应放在段落开头；
-        #       · 若用于支撑论述，应放在对应描述句之后；
-        #       · 若总结结果或展示对比，应放在段落结尾；
-        # - 请务必确保图片位置与文字逻辑匹配，使图片与正文形成自然的论证衔接；
-        # - 请不要删除、合并或重排序图片编号，系统将在后续自动替换为真实图像。
-        # 请直接输出该章节的正文内容，不要有任何其他文字。
-        # """
         content = self.call(prompt)

         {full_summary}
         """
+        if st.session_state.preference_select:
+            prompt += f"以下是用户的分析偏好设置：{st.session_state.preference_select}”。\n\n"
+        if st.session_state.additional_preference:
+            prompt += f"用户提供了以下建模目的与特殊需求：{st.session_state.additional_preference}”。\n\n"
         toc_response = self.call(prompt)
         return toc_response.strip()
         1. 若章节间存在嵌套关系，优先分配给最具体的子章节（如 3.1.2 比 3.1 更优）。
         """
+        if st.session_state.preference_select:
+            prompt += f"以下是用户的分析偏好设置：{st.session_state.preference_select}”。\n\n"
+        if st.session_state.additional_preference:
+            prompt += f"用户提供了以下建模目的与特殊需求：{st.session_state.additional_preference}”。\n\n"
         toc_with_figs = self.call(prompt)
         return toc_with_figs.strip()
         请严格在以上范围内撰写本章节正文。
         """
+        if st.session_state.preference_select:
+            prompt += f"以下是用户的分析偏好设置：{st.session_state.preference_select}”。\n\n"
+        if st.session_state.additional_preference:
+            prompt += f"用户提供了以下建模目的与特殊需求：{st.session_state.additional_preference}”。\n\n"
         content = self.call(prompt)

workflow/dataloading/dataloading_render.py CHANGED Viewed

@@ -184,7 +184,7 @@ if __name__ == "__main__":
     auto = planner.loading_auto
     if st.session_state.auto_mode == True:
-        if (agent.finish_auto_task == True and planner.switched_prep == False) or planner.prep_auto == False:
             planner.finish_loading_auto()
             st.switch_page("workflow/preprocessing/preprocessing_render.py")

     auto = planner.loading_auto
     if st.session_state.auto_mode == True:
+        if (agent.finish_auto_task == True and planner.switched_prep == False) or planner.loading_auto == False:
             planner.finish_loading_auto()
             st.switch_page("workflow/preprocessing/preprocessing_render.py")

workflow/preference/pref_render.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import streamlit as st
+def preferences_select():
+    modeling_requirements = st.text_area(
+        "请描述你的数据分析目标与需求",
+        placeholder="例如：请帮我对数据进行可视化", height=200
+    )
+    st.session_state.additional_preference = modeling_requirements
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        report_style = st.radio(
+            "1. 报告风格",
+            ["简洁直观", "适中平衡", "深度技术型"],
+            index=1,
+        )
+    with col2:
+        analysis_type = st.radio(
+            "2. 分析方向偏好",
+            ["商业分析", "学术分析", "工程/产品分析"],
+        )
+    with col3:
+        model_pref = st.radio(
+            "3. 模型偏好",
+            ["可解释性强", "预测性能最优", "训练时间短"],
+            index=0,
+        )
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        missing_pref = st.radio(
+            "4. 缺失值处理方式",
+            ["简单填补", "频率填补", "高级填补（KNN/MICE）"],
+        )
+    with col2:
+        lang_style = st.radio(
+            "5. 报告语言风格",
+            ["通俗易懂", "商业风", "学术论文风"],
+        )
+    with col3:
+        feature_pref = st.radio(
+            "6. 特征工程偏好",
+            ["少量关键特征", "大量候选特征", "只做基础处理"],
+        )
+    preferences = None
+    if st.button("▶️ 保存偏好设置"):
+        preferences = {
+            "报告风格": report_style,
+            "模型偏好": model_pref,
+            "缺失值处理方式": missing_pref,
+            "特征工程偏好": feature_pref,
+            "报告语言风格": lang_style,
+            "分析方向偏好": analysis_type,
+        }
+        st.success("✅ 偏好设置已保存！")
+        st.session_state.preference_select = preferences
+    return preferences
+if __name__ == "__main__":
+    st.title("偏好设置")
+    st.markdown("---")
+    c = st.columns(2)
+    with c[0].expander('偏好设置', True):
+        preferences_select()

workflow/preprocessing/preprocessing_core.py CHANGED Viewed

@@ -52,7 +52,7 @@ def prep_meta_execution(agent, code, df, auto=False):
                 with st.spinner("正在运行程序..."):
                     exec(code, exec_ns)
             except Exception as exc:
-                st.error(f"已保存报错，请重新调用llm生成代码debug")
                 st.text(traceback.format_exc())
                 agent.save_error(traceback.format_exc())
                 prep_code_gen(agent, debug=True)

                 with st.spinner("正在运行程序..."):
                     exec(code, exec_ns)
             except Exception as exc:
+                st.error(f"已保存报错，正在重新调用llm生成代码debug")
                 st.text(traceback.format_exc())
                 agent.save_error(traceback.format_exc())
                 prep_code_gen(agent, debug=True)

workflow/report/report_utils.py CHANGED Viewed

@@ -2,7 +2,7 @@ import re
 import base64
 import streamlit as st
-# from playwright.sync_api import sync_playwright
 def html_to_pdf_bytes_playwright(html: str) -> bytes:

 import base64
 import streamlit as st
+from playwright.sync_api import sync_playwright
 def html_to_pdf_bytes_playwright(html: str) -> bytes: