Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- .DS_Store +0 -0
- .gitattributes +1 -0
- app.py +31 -9
- logo/.DS_Store +0 -0
- logo/github.jpeg +3 -0
- logo/logo_big.png +2 -2
- prompt_engineer/planner.py +5 -2
- prompt_engineer/sec1_call_llm.py +5 -0
- prompt_engineer/sec2_call_llm.py +12 -0
- prompt_engineer/sec3_call_llm.py +10 -0
- prompt_engineer/sec4_call_llm.py +14 -47
- prompt_engineer/sec5_call_llm.py +13 -51
- workflow/dataloading/dataloading_render.py +1 -1
- workflow/preference/pref_render.py +81 -0
- workflow/preprocessing/preprocessing_core.py +1 -1
- workflow/report/report_utils.py +1 -1
.DS_Store
ADDED
|
Binary file (8.2 kB). View file
|
|
|
.gitattributes
CHANGED
|
@@ -37,3 +37,4 @@ logo/logo_16_9.png filter=lfs diff=lfs merge=lfs -text
|
|
| 37 |
logo/logo_big.png filter=lfs diff=lfs merge=lfs -text
|
| 38 |
logo/logo_blue_wide.png filter=lfs diff=lfs merge=lfs -text
|
| 39 |
logo/logo_wide.png filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 37 |
logo/logo_big.png filter=lfs diff=lfs merge=lfs -text
|
| 38 |
logo/logo_blue_wide.png filter=lfs diff=lfs merge=lfs -text
|
| 39 |
logo/logo_wide.png filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
logo/github.jpeg filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
|
@@ -52,6 +52,12 @@ def init_session_state():
|
|
| 52 |
st.session_state.modeling_start_time = None
|
| 53 |
if 'report_start_time' not in st.session_state:
|
| 54 |
st.session_state.report_start_time = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
if 'data_loading_agent' not in st.session_state:
|
| 57 |
st.session_state.data_loading_agent = DataLoadingAgent(
|
|
@@ -123,11 +129,11 @@ def run_app():
|
|
| 123 |
type="password",
|
| 124 |
key="api_key_input",
|
| 125 |
)
|
| 126 |
-
|
| 127 |
|
| 128 |
if st.button("💾 保存密钥", use_container_width=True, key="save_key"):
|
| 129 |
# 保存在 utils/.streamlit/secrets.toml
|
| 130 |
-
|
| 131 |
|
| 132 |
st.session_state.api_keys[selected] = api_key_input
|
| 133 |
st.success("已保存")
|
|
@@ -170,10 +176,22 @@ def run_app():
|
|
| 170 |
|
| 171 |
if st.session_state.data_loading_agent.load_df() is not None:
|
| 172 |
planner = st.session_state.planner_agent
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
st.
|
| 176 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
|
| 178 |
st.image(
|
| 179 |
"logo/logo_big.png",
|
|
@@ -181,13 +199,17 @@ def run_app():
|
|
| 181 |
)
|
| 182 |
|
| 183 |
# Define pages
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
data_loading = st.Page(
|
| 185 |
"workflow/dataloading/dataloading_render.py",
|
| 186 |
title="📥 数据导入",
|
| 187 |
)
|
| 188 |
preprocessing = st.Page(
|
| 189 |
"workflow/preprocessing/preprocessing_render.py",
|
| 190 |
-
title="
|
| 191 |
)
|
| 192 |
visualization = st.Page(
|
| 193 |
"workflow/visualization/viz_render.py",
|
|
@@ -204,8 +226,8 @@ def run_app():
|
|
| 204 |
# Navigation
|
| 205 |
pg = st.navigation(
|
| 206 |
{
|
| 207 |
-
"
|
| 208 |
-
"
|
| 209 |
}
|
| 210 |
)
|
| 211 |
pg.run()
|
|
|
|
| 52 |
st.session_state.modeling_start_time = None
|
| 53 |
if 'report_start_time' not in st.session_state:
|
| 54 |
st.session_state.report_start_time = None
|
| 55 |
+
if 'preference_select' not in st.session_state:
|
| 56 |
+
st.session_state.preference_select = None
|
| 57 |
+
if 'additional_preference' not in st.session_state:
|
| 58 |
+
st.session_state.additional_preference = None
|
| 59 |
+
if "from_auto" not in st.session_state:
|
| 60 |
+
st.session_state.from_auto = False
|
| 61 |
|
| 62 |
if 'data_loading_agent' not in st.session_state:
|
| 63 |
st.session_state.data_loading_agent = DataLoadingAgent(
|
|
|
|
| 129 |
type="password",
|
| 130 |
key="api_key_input",
|
| 131 |
)
|
| 132 |
+
|
| 133 |
|
| 134 |
if st.button("💾 保存密钥", use_container_width=True, key="save_key"):
|
| 135 |
# 保存在 utils/.streamlit/secrets.toml
|
| 136 |
+
update_local_api_key(selected, api_key_input)
|
| 137 |
|
| 138 |
st.session_state.api_keys[selected] = api_key_input
|
| 139 |
st.success("已保存")
|
|
|
|
| 176 |
|
| 177 |
if st.session_state.data_loading_agent.load_df() is not None:
|
| 178 |
planner = st.session_state.planner_agent
|
| 179 |
+
|
| 180 |
+
if st.session_state.auto_mode is False:
|
| 181 |
+
if st.button("🚗 自动模式", use_container_width=True):
|
| 182 |
+
st.session_state.auto_mode = True
|
| 183 |
+
planner.self_driving(st.session_state.data_loading_agent.load_df())
|
| 184 |
+
st.switch_page("workflow/dataloading/dataloading_render.py")
|
| 185 |
+
st.rerun()
|
| 186 |
+
else:
|
| 187 |
+
if st.button("❌ 结束自动模式", use_container_width=True):
|
| 188 |
+
st.session_state.auto_mode = False
|
| 189 |
+
st.session_state.planner_agent = PlannerAgent(
|
| 190 |
+
api_keys=st.session_state.api_keys,
|
| 191 |
+
model_configs=MODEL_CONFIGS,
|
| 192 |
+
model=st.session_state.selected_model
|
| 193 |
+
)
|
| 194 |
+
st.rerun()
|
| 195 |
|
| 196 |
st.image(
|
| 197 |
"logo/logo_big.png",
|
|
|
|
| 199 |
)
|
| 200 |
|
| 201 |
# Define pages
|
| 202 |
+
preference = st.Page(
|
| 203 |
+
"workflow/preference/pref_render.py",
|
| 204 |
+
title="⚙️ 偏好设置",
|
| 205 |
+
)
|
| 206 |
data_loading = st.Page(
|
| 207 |
"workflow/dataloading/dataloading_render.py",
|
| 208 |
title="📥 数据导入",
|
| 209 |
)
|
| 210 |
preprocessing = st.Page(
|
| 211 |
"workflow/preprocessing/preprocessing_render.py",
|
| 212 |
+
title="🛠️ 数据预处理",
|
| 213 |
)
|
| 214 |
visualization = st.Page(
|
| 215 |
"workflow/visualization/viz_render.py",
|
|
|
|
| 226 |
# Navigation
|
| 227 |
pg = st.navigation(
|
| 228 |
{
|
| 229 |
+
"功能": [data_loading, preprocessing, visualization, coding_modeling, report],
|
| 230 |
+
"设置": [preference]
|
| 231 |
}
|
| 232 |
)
|
| 233 |
pg.run()
|
logo/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
logo/github.jpeg
ADDED
|
Git LFS Details
|
logo/logo_big.png
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
prompt_engineer/planner.py
CHANGED
|
@@ -33,8 +33,10 @@ class PlannerAgent(LLMClient):
|
|
| 33 |
f"- 前 5 行样本:\n{df.head().to_dict(orient='list')}\n\n"
|
| 34 |
)
|
| 35 |
|
| 36 |
-
if
|
| 37 |
-
prompt += f"
|
|
|
|
|
|
|
| 38 |
|
| 39 |
prompt += """
|
| 40 |
你需要在以下 5 个步骤中,对每个步骤分别判断是否应该开启(True / False):
|
|
@@ -66,6 +68,7 @@ class PlannerAgent(LLMClient):
|
|
| 66 |
|
| 67 |
print(plan_dict)
|
| 68 |
self.loading_auto = bool(plan_dict.get("loading_auto", False))
|
|
|
|
| 69 |
self.prep_auto = bool(plan_dict.get("prep_auto", False))
|
| 70 |
self.vis_auto = bool(plan_dict.get("vis_auto", False))
|
| 71 |
self.modeling_auto = bool(plan_dict.get("modeling_auto", False))
|
|
|
|
| 33 |
f"- 前 5 行样本:\n{df.head().to_dict(orient='list')}\n\n"
|
| 34 |
)
|
| 35 |
|
| 36 |
+
if st.session_state.preference_select:
|
| 37 |
+
prompt += f"以下是用户的分析偏好设置:{st.session_state.preference_select}”。\n\n"
|
| 38 |
+
if st.session_state.additional_preference:
|
| 39 |
+
prompt += f"用户提供了以下建模目的与特殊需求:{st.session_state.additional_preference}”。\n\n"
|
| 40 |
|
| 41 |
prompt += """
|
| 42 |
你需要在以下 5 个步骤中,对每个步骤分别判断是否应该开启(True / False):
|
|
|
|
| 68 |
|
| 69 |
print(plan_dict)
|
| 70 |
self.loading_auto = bool(plan_dict.get("loading_auto", False))
|
| 71 |
+
self.loading_auto = True
|
| 72 |
self.prep_auto = bool(plan_dict.get("prep_auto", False))
|
| 73 |
self.vis_auto = bool(plan_dict.get("vis_auto", False))
|
| 74 |
self.modeling_auto = bool(plan_dict.get("modeling_auto", False))
|
prompt_engineer/sec1_call_llm.py
CHANGED
|
@@ -153,6 +153,11 @@ class DataLoadingAgent(LLMClient):
|
|
| 153 |
- 重点突出数据结构、含义与潜在问题。
|
| 154 |
"""
|
| 155 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
desc = self.call(prompt)
|
| 157 |
|
| 158 |
return desc
|
|
|
|
| 153 |
- 重点突出数据结构、含义与潜在问题。
|
| 154 |
"""
|
| 155 |
|
| 156 |
+
if st.session_state.preference_select:
|
| 157 |
+
prompt += f"以下是用户的分析偏好设置:{st.session_state.preference_select}”。\n\n"
|
| 158 |
+
if st.session_state.additional_preference:
|
| 159 |
+
prompt += f"用户提供了以下建模目的与特殊需求:{st.session_state.additional_preference}”。\n\n"
|
| 160 |
+
|
| 161 |
desc = self.call(prompt)
|
| 162 |
|
| 163 |
return desc
|
prompt_engineer/sec2_call_llm.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
import numpy as np
|
| 2 |
import pandas as pd
|
|
|
|
| 3 |
|
| 4 |
from prompt_engineer.call_llm import LLMClient
|
| 5 |
|
|
@@ -186,6 +187,11 @@ class DataPreprocessAgent(LLMClient):
|
|
| 186 |
输出应保持结构化与连贯性,避免重复说明。
|
| 187 |
"""
|
| 188 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
suggestions = self.call(prompt)
|
| 190 |
|
| 191 |
return suggestions
|
|
@@ -268,6 +274,12 @@ class DataPreprocessAgent(LLMClient):
|
|
| 268 |
if self.refined_suggestions is not None:
|
| 269 |
prompt += f"LLM返回的预处理建议:{self.refined_suggestions}"
|
| 270 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
raw = self.call(prompt)
|
| 272 |
return raw
|
| 273 |
|
|
|
|
| 1 |
import numpy as np
|
| 2 |
import pandas as pd
|
| 3 |
+
import streamlit as st
|
| 4 |
|
| 5 |
from prompt_engineer.call_llm import LLMClient
|
| 6 |
|
|
|
|
| 187 |
输出应保持结构化与连贯性,避免重复说明。
|
| 188 |
"""
|
| 189 |
|
| 190 |
+
if st.session_state.preference_select:
|
| 191 |
+
prompt += f"以下是用户的分析偏好设置:{st.session_state.preference_select}”。\n\n"
|
| 192 |
+
if st.session_state.additional_preference:
|
| 193 |
+
prompt += f"用户提供了以下建模目的与特殊需求:{st.session_state.additional_preference}”。\n\n"
|
| 194 |
+
|
| 195 |
suggestions = self.call(prompt)
|
| 196 |
|
| 197 |
return suggestions
|
|
|
|
| 274 |
if self.refined_suggestions is not None:
|
| 275 |
prompt += f"LLM返回的预处理建议:{self.refined_suggestions}"
|
| 276 |
|
| 277 |
+
if st.session_state.preference_select:
|
| 278 |
+
prompt += f"以下是用户的分析偏好设置:{st.session_state.preference_select}”。\n\n"
|
| 279 |
+
|
| 280 |
+
if st.session_state.additional_preference:
|
| 281 |
+
prompt += f"用户提供了以下建模目的与特殊需求:{st.session_state.additional_preference}”。\n\n"
|
| 282 |
+
|
| 283 |
raw = self.call(prompt)
|
| 284 |
return raw
|
| 285 |
|
prompt_engineer/sec3_call_llm.py
CHANGED
|
@@ -227,6 +227,11 @@ class VisualizationAgent(LLMClient):
|
|
| 227 |
6. 禁止输出代码。
|
| 228 |
""".strip()
|
| 229 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
recommendations = self.call(prompt)
|
| 231 |
return recommendations
|
| 232 |
|
|
@@ -545,6 +550,11 @@ class VisualizationAgent(LLMClient):
|
|
| 545 |
else:
|
| 546 |
self.debug_num = 0
|
| 547 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 548 |
raw = self.call(prompt)
|
| 549 |
|
| 550 |
return raw
|
|
|
|
| 227 |
6. 禁止输出代码。
|
| 228 |
""".strip()
|
| 229 |
|
| 230 |
+
if st.session_state.preference_select:
|
| 231 |
+
prompt += f"以下是用户的分析偏好设置:{st.session_state.preference_select}”。\n\n"
|
| 232 |
+
if st.session_state.additional_preference:
|
| 233 |
+
prompt += f"用户提供了以下建模目的与特殊需求:{st.session_state.additional_preference}”。\n\n"
|
| 234 |
+
|
| 235 |
recommendations = self.call(prompt)
|
| 236 |
return recommendations
|
| 237 |
|
|
|
|
| 550 |
else:
|
| 551 |
self.debug_num = 0
|
| 552 |
|
| 553 |
+
if st.session_state.preference_select:
|
| 554 |
+
prompt += f"以下是用户的分析偏好设置:{st.session_state.preference_select}”。\n\n"
|
| 555 |
+
if st.session_state.additional_preference:
|
| 556 |
+
prompt += f"用户提供了以下建模目的与特殊需求:{st.session_state.additional_preference}”。\n\n"
|
| 557 |
+
|
| 558 |
raw = self.call(prompt)
|
| 559 |
|
| 560 |
return raw
|
prompt_engineer/sec4_call_llm.py
CHANGED
|
@@ -262,6 +262,11 @@ class ModelingCodingAgent(LLMClient):
|
|
| 262 |
else:
|
| 263 |
self.debug_num = 0
|
| 264 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
raw = self.call(prompt)
|
| 266 |
|
| 267 |
return raw
|
|
@@ -291,6 +296,11 @@ class ModelingCodingAgent(LLMClient):
|
|
| 291 |
{result_json}
|
| 292 |
""".strip()
|
| 293 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 294 |
raw = self.call(prompt)
|
| 295 |
|
| 296 |
return raw
|
|
@@ -378,54 +388,11 @@ class ModelingCodingAgent(LLMClient):
|
|
| 378 |
3. 保持语言专业、简洁,不输出代码。
|
| 379 |
"""
|
| 380 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 381 |
|
| 382 |
-
# # === 主 prompt 组装 ===
|
| 383 |
-
# prompt = f"""
|
| 384 |
-
# 你是一位资深的机器学习建模专家。
|
| 385 |
-
|
| 386 |
-
# 以下是用户的数据信息:
|
| 387 |
-
# {data_info}
|
| 388 |
-
|
| 389 |
-
# {memory_block}
|
| 390 |
-
# """.strip()
|
| 391 |
-
|
| 392 |
-
# # 若用户有明确建模目标
|
| 393 |
-
# if getattr(self, "target", None):
|
| 394 |
-
# prompt += f"\n\n建模目标:{self.target}(务必满足,并请在回答中复述)"
|
| 395 |
-
|
| 396 |
-
# # 若用户额外输入了需求
|
| 397 |
-
# if user_input:
|
| 398 |
-
# prompt += f"""\n\n用户的当前需求:{user_input}(务必满足!)
|
| 399 |
-
# 若用户的要求是局部更新,则保留先前内容,仅修改特定部分。"""
|
| 400 |
-
|
| 401 |
-
# # 若有之前生成的训练代码
|
| 402 |
-
# train_code = self.load_code()
|
| 403 |
-
# if train_code:
|
| 404 |
-
# prompt += f"""
|
| 405 |
-
|
| 406 |
-
# 用户之前生成的训练代码:
|
| 407 |
-
# {train_code}
|
| 408 |
-
|
| 409 |
-
# 请在理解该代码的基础上,提供 **1–2 条模型改进建议**,
|
| 410 |
-
# 可涉及但不限于:
|
| 411 |
-
# - 模型结构调整
|
| 412 |
-
# - 特征工程优化
|
| 413 |
-
# - 模型替换(例如从树模型切换为深度学习模型)
|
| 414 |
-
# - 超参数调整或正则化策略优化
|
| 415 |
-
# """
|
| 416 |
-
# else:
|
| 417 |
-
# prompt += """
|
| 418 |
-
|
| 419 |
-
# 请基于数据特征,推荐 2–3 个合适的模型,
|
| 420 |
-
# 并说明每个模型的适用场景和优劣分析。
|
| 421 |
-
# """
|
| 422 |
-
|
| 423 |
-
# # 若存在以往建模结果
|
| 424 |
-
# modeling_result = self.load_modeling_result()
|
| 425 |
-
# if modeling_result:
|
| 426 |
-
# prompt += f"\n\n用户之前的模型运行结果:\n{modeling_result}"
|
| 427 |
-
|
| 428 |
-
# === 调用 LLM ===
|
| 429 |
raw = self.call(prompt)
|
| 430 |
return raw
|
| 431 |
|
|
|
|
| 262 |
else:
|
| 263 |
self.debug_num = 0
|
| 264 |
|
| 265 |
+
if st.session_state.preference_select:
|
| 266 |
+
prompt += f"以下是用户的分析偏好设置:{st.session_state.preference_select}”。\n\n"
|
| 267 |
+
if st.session_state.additional_preference:
|
| 268 |
+
prompt += f"用户提供了以下建模目的与特殊需求:{st.session_state.additional_preference}”。\n\n"
|
| 269 |
+
|
| 270 |
raw = self.call(prompt)
|
| 271 |
|
| 272 |
return raw
|
|
|
|
| 296 |
{result_json}
|
| 297 |
""".strip()
|
| 298 |
|
| 299 |
+
if st.session_state.preference_select:
|
| 300 |
+
prompt += f"以下是用户的分析偏好设置:{st.session_state.preference_select}”。\n\n"
|
| 301 |
+
if st.session_state.additional_preference:
|
| 302 |
+
prompt += f"用户提供了以下建模目的与特殊需求:{st.session_state.additional_preference}”。\n\n"
|
| 303 |
+
|
| 304 |
raw = self.call(prompt)
|
| 305 |
|
| 306 |
return raw
|
|
|
|
| 388 |
3. 保持语言专业、简洁,不输出代码。
|
| 389 |
"""
|
| 390 |
|
| 391 |
+
if st.session_state.preference_select:
|
| 392 |
+
prompt += f"以下是用户的分析偏好设置:{st.session_state.preference_select}”。\n\n"
|
| 393 |
+
if st.session_state.additional_preference:
|
| 394 |
+
prompt += f"用户提供了以下建模目的与特殊需求:{st.session_state.additional_preference}”。\n\n"
|
| 395 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 396 |
raw = self.call(prompt)
|
| 397 |
return raw
|
| 398 |
|
prompt_engineer/sec5_call_llm.py
CHANGED
|
@@ -354,6 +354,10 @@ class ReportAgent(LLMClient):
|
|
| 354 |
{full_summary}
|
| 355 |
"""
|
| 356 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 357 |
|
| 358 |
toc_response = self.call(prompt)
|
| 359 |
return toc_response.strip()
|
|
@@ -408,6 +412,11 @@ class ReportAgent(LLMClient):
|
|
| 408 |
1. 若章节间存在嵌套关系,优先分配给最具体的子章节(如 3.1.2 比 3.1 更优)。
|
| 409 |
"""
|
| 410 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 411 |
toc_with_figs = self.call(prompt)
|
| 412 |
return toc_with_figs.strip()
|
| 413 |
|
|
@@ -560,57 +569,10 @@ class ReportAgent(LLMClient):
|
|
| 560 |
请严格在以上范围内撰写本章节正文。
|
| 561 |
"""
|
| 562 |
|
| 563 |
-
|
| 564 |
-
|
| 565 |
-
|
| 566 |
-
|
| 567 |
-
# 当前需要撰写的章节信息(完整四元组,依次为标题、层级、内容大纲、图编号列表):
|
| 568 |
-
# {t}
|
| 569 |
-
|
| 570 |
-
# 报告目录结构(包含所有章节的四元组信息):
|
| 571 |
-
# {toc}
|
| 572 |
-
|
| 573 |
-
# 可参考的分析内容如下:
|
| 574 |
-
# {selected_full_contents}
|
| 575 |
-
|
| 576 |
-
# 此前已生成的章节内容如下(用于保持整体风格一致,并避免内容重复):
|
| 577 |
-
# {history_content}
|
| 578 |
-
|
| 579 |
-
# 请根据章节标题、层级、内容大纲、参考信息和图编号列表,生成该章节的完整正文。
|
| 580 |
-
|
| 581 |
-
# 正文详细程度有三种模式:
|
| 582 |
-
# - 简要:只包含核心结论与关键点,语言精炼;
|
| 583 |
-
# - 标准:包含主要分析逻辑、步骤与结果;
|
| 584 |
-
# - 详细:展开完整分析、方法论、推理过程与补充说明。
|
| 585 |
-
# 用户当前选择的模式是:{self.outline_length}
|
| 586 |
-
|
| 587 |
-
# 写作要求:
|
| 588 |
-
# 1. **核心任务**:仅撰写当前章节 **“{t[0]}”** 的正文内容,不得涉及其他章节。
|
| 589 |
-
# 2. **图表引用**:正文中引用的图表必须严格对应本章节的图编号(即 {t[3]}),**不得使用或编造其他编号**。
|
| 590 |
-
# 3. **语言规范**:
|
| 591 |
-
# - 语言应专业、准确、逻辑严谨;
|
| 592 |
-
# - 叙述风格应正式、学术化;
|
| 593 |
-
# - 禁止使用口语化或主观色彩表达。
|
| 594 |
-
# 4. **输出要求**:
|
| 595 |
-
# - 仅输出章节正文内容,不得输出 Markdown;
|
| 596 |
-
# - 不得输出任何标题,如:1,一,(1)等;
|
| 597 |
-
# - 禁止加粗、斜体、表情符号或其他符号修饰;
|
| 598 |
-
# - 不得出现非正文短语,如 “我认为”、“请继续”、“感谢阅读”、“---” 等;
|
| 599 |
-
# 5. **图片规范**:
|
| 600 |
-
# - 图片应独立成行,不得嵌入句子内部;
|
| 601 |
-
# - 图片可放置在段落的开头、结尾,或自然停顿处(如句号、分号后),以保持语义连贯;
|
| 602 |
-
# - 使用占位符格式 [FIG:index] 标记图片位置,其中 index 为对应图片的编号;
|
| 603 |
-
# - 在每个 [FIG:index] 占位符后,需紧跟一行图片标题,格式如下:
|
| 604 |
-
# 图:图片标题(简要说明图片内容及分析要点)
|
| 605 |
-
# - 图片插入位置应依据其语义和上下文逻辑确定:
|
| 606 |
-
# · 若图片用于引出分析,应放在段落开头;
|
| 607 |
-
# · 若用于支撑论述,应放在对应描述句之后;
|
| 608 |
-
# · 若总结结果或展示对比,应放在段落结尾;
|
| 609 |
-
# - 请务必确保图片位置与文字逻辑匹配,使图片与正文形成自然的论证衔接;
|
| 610 |
-
# - 请不要删除、合并或重排序图片编号,系统将在后续自动替换为真实图像。
|
| 611 |
-
|
| 612 |
-
# 请直接输出该章节的正文内容,不要有任何其他文字。
|
| 613 |
-
# """
|
| 614 |
|
| 615 |
content = self.call(prompt)
|
| 616 |
|
|
|
|
| 354 |
{full_summary}
|
| 355 |
"""
|
| 356 |
|
| 357 |
+
if st.session_state.preference_select:
|
| 358 |
+
prompt += f"以下是用户的分析偏好设置:{st.session_state.preference_select}”。\n\n"
|
| 359 |
+
if st.session_state.additional_preference:
|
| 360 |
+
prompt += f"用户提供了以下建模目的与特殊需求:{st.session_state.additional_preference}”。\n\n"
|
| 361 |
|
| 362 |
toc_response = self.call(prompt)
|
| 363 |
return toc_response.strip()
|
|
|
|
| 412 |
1. 若章节间存在嵌套关系,优先分配给最具体的子章节(如 3.1.2 比 3.1 更优)。
|
| 413 |
"""
|
| 414 |
|
| 415 |
+
if st.session_state.preference_select:
|
| 416 |
+
prompt += f"以下是用户的分析偏好设置:{st.session_state.preference_select}”。\n\n"
|
| 417 |
+
if st.session_state.additional_preference:
|
| 418 |
+
prompt += f"用户提供了以下建模目的与特殊需求:{st.session_state.additional_preference}”。\n\n"
|
| 419 |
+
|
| 420 |
toc_with_figs = self.call(prompt)
|
| 421 |
return toc_with_figs.strip()
|
| 422 |
|
|
|
|
| 569 |
请严格在以上范围内撰写本章节正文。
|
| 570 |
"""
|
| 571 |
|
| 572 |
+
if st.session_state.preference_select:
|
| 573 |
+
prompt += f"以下是用户的分析偏好设置:{st.session_state.preference_select}”。\n\n"
|
| 574 |
+
if st.session_state.additional_preference:
|
| 575 |
+
prompt += f"用户提供了以下建模目的与特殊需求:{st.session_state.additional_preference}”。\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 576 |
|
| 577 |
content = self.call(prompt)
|
| 578 |
|
workflow/dataloading/dataloading_render.py
CHANGED
|
@@ -184,7 +184,7 @@ if __name__ == "__main__":
|
|
| 184 |
auto = planner.loading_auto
|
| 185 |
|
| 186 |
if st.session_state.auto_mode == True:
|
| 187 |
-
if (agent.finish_auto_task == True and planner.switched_prep == False) or planner.
|
| 188 |
planner.finish_loading_auto()
|
| 189 |
st.switch_page("workflow/preprocessing/preprocessing_render.py")
|
| 190 |
|
|
|
|
| 184 |
auto = planner.loading_auto
|
| 185 |
|
| 186 |
if st.session_state.auto_mode == True:
|
| 187 |
+
if (agent.finish_auto_task == True and planner.switched_prep == False) or planner.loading_auto == False:
|
| 188 |
planner.finish_loading_auto()
|
| 189 |
st.switch_page("workflow/preprocessing/preprocessing_render.py")
|
| 190 |
|
workflow/preference/pref_render.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def preferences_select():
|
| 5 |
+
|
| 6 |
+
modeling_requirements = st.text_area(
|
| 7 |
+
"请描述你的数据分析目标与需求",
|
| 8 |
+
placeholder="例如:请帮我对数据进行可视化", height=200
|
| 9 |
+
)
|
| 10 |
+
st.session_state.additional_preference = modeling_requirements
|
| 11 |
+
|
| 12 |
+
col1, col2, col3 = st.columns(3)
|
| 13 |
+
|
| 14 |
+
with col1:
|
| 15 |
+
report_style = st.radio(
|
| 16 |
+
"1. 报告风格",
|
| 17 |
+
["简洁直观", "适中平衡", "深度技术型"],
|
| 18 |
+
index=1,
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
with col2:
|
| 22 |
+
analysis_type = st.radio(
|
| 23 |
+
"2. 分析方向偏好",
|
| 24 |
+
["商业分析", "学术分析", "工程/产品分析"],
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
with col3:
|
| 28 |
+
model_pref = st.radio(
|
| 29 |
+
"3. 模型偏好",
|
| 30 |
+
["可解释性强", "预测性能最优", "训练时间短"],
|
| 31 |
+
index=0,
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
col1, col2, col3 = st.columns(3)
|
| 35 |
+
|
| 36 |
+
with col1:
|
| 37 |
+
missing_pref = st.radio(
|
| 38 |
+
"4. 缺失值处理方式",
|
| 39 |
+
["简单填补", "频率填补", "高级填补(KNN/MICE)"],
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
with col2:
|
| 43 |
+
lang_style = st.radio(
|
| 44 |
+
"5. 报告语言风格",
|
| 45 |
+
["通俗易懂", "商业风", "学术论文风"],
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
with col3:
|
| 49 |
+
feature_pref = st.radio(
|
| 50 |
+
"6. 特征工程偏好",
|
| 51 |
+
["少量关键特征", "大量候选特征", "只做基础处理"],
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
preferences = None
|
| 55 |
+
if st.button("▶️ 保存偏好设置"):
|
| 56 |
+
preferences = {
|
| 57 |
+
"报告风格": report_style,
|
| 58 |
+
"模型偏好": model_pref,
|
| 59 |
+
"缺失值处理方式": missing_pref,
|
| 60 |
+
"特征工程偏好": feature_pref,
|
| 61 |
+
"报告语言风格": lang_style,
|
| 62 |
+
"分析方向偏好": analysis_type,
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
st.success("✅ 偏好设置已保存!")
|
| 66 |
+
st.session_state.preference_select = preferences
|
| 67 |
+
|
| 68 |
+
return preferences
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
if __name__ == "__main__":
|
| 72 |
+
|
| 73 |
+
st.title("偏好设置")
|
| 74 |
+
st.markdown("---")
|
| 75 |
+
|
| 76 |
+
c = st.columns(2)
|
| 77 |
+
with c[0].expander('偏好设置', True):
|
| 78 |
+
preferences_select()
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
|
workflow/preprocessing/preprocessing_core.py
CHANGED
|
@@ -52,7 +52,7 @@ def prep_meta_execution(agent, code, df, auto=False):
|
|
| 52 |
with st.spinner("正在运行程序..."):
|
| 53 |
exec(code, exec_ns)
|
| 54 |
except Exception as exc:
|
| 55 |
-
st.error(f"
|
| 56 |
st.text(traceback.format_exc())
|
| 57 |
agent.save_error(traceback.format_exc())
|
| 58 |
prep_code_gen(agent, debug=True)
|
|
|
|
| 52 |
with st.spinner("正在运行程序..."):
|
| 53 |
exec(code, exec_ns)
|
| 54 |
except Exception as exc:
|
| 55 |
+
st.error(f"已保存报错,正在重新调用llm生成代码debug")
|
| 56 |
st.text(traceback.format_exc())
|
| 57 |
agent.save_error(traceback.format_exc())
|
| 58 |
prep_code_gen(agent, debug=True)
|
workflow/report/report_utils.py
CHANGED
|
@@ -2,7 +2,7 @@ import re
|
|
| 2 |
import base64
|
| 3 |
|
| 4 |
import streamlit as st
|
| 5 |
-
|
| 6 |
|
| 7 |
|
| 8 |
def html_to_pdf_bytes_playwright(html: str) -> bytes:
|
|
|
|
| 2 |
import base64
|
| 3 |
|
| 4 |
import streamlit as st
|
| 5 |
+
from playwright.sync_api import sync_playwright
|
| 6 |
|
| 7 |
|
| 8 |
def html_to_pdf_bytes_playwright(html: str) -> bytes:
|