Update src/promptings/DebateCoder.py
Browse files- src/promptings/DebateCoder.py +12 -54
src/promptings/DebateCoder.py
CHANGED
|
@@ -16,7 +16,6 @@ try:
|
|
| 16 |
except ImportError:
|
| 17 |
print("Warning: Could not import Pangu model. Debugging might fail if Pangu is not defined.")
|
| 18 |
|
| 19 |
-
from models.Qwen import Qwen
|
| 20 |
from .Base import BaseStrategy
|
| 21 |
from models.Base import BaseModel
|
| 22 |
from results.Results import Results
|
|
@@ -57,7 +56,7 @@ class DebateCoder(BaseStrategy):
|
|
| 57 |
|
| 58 |
def log_response(self, response: str, stage: str, item: dict):
|
| 59 |
"""记录响应到日志文件"""
|
| 60 |
-
log_file = os.path.join(self.log_dir, f"
|
| 61 |
|
| 62 |
try:
|
| 63 |
with open(log_file, "a", encoding="utf-8") as f:
|
|
@@ -102,7 +101,6 @@ Your responsibility is to ensure:
|
|
| 102 |
"""生成初始计划(第一轮)"""
|
| 103 |
role_prompt = self.get_agent_role_prompt(role)
|
| 104 |
problem_description = self.data.get_prompt(item)
|
| 105 |
-
|
| 106 |
messages = [
|
| 107 |
{
|
| 108 |
"role": "system",
|
|
@@ -126,7 +124,7 @@ Provide a numbered list of steps to solve this problem."""
|
|
| 126 |
print(f"\n{'='*60}")
|
| 127 |
print(f"[Round 1] {role} - Initial Planning")
|
| 128 |
print(f"{'='*60}")
|
| 129 |
-
|
| 130 |
response, pr_tok, com_tok = self.gpt_chat(messages)
|
| 131 |
item['api_calls'] = item.get('api_calls', 0) + 1
|
| 132 |
|
|
@@ -435,7 +433,7 @@ Output ONLY a number between 0-100, nothing else."""
|
|
| 435 |
return code_str
|
| 436 |
|
| 437 |
|
| 438 |
-
def Reviewer_pangu1b(self,problem_prompt: str, code: str, test_log: str, task_id: str = "unknown") -> Tuple[str, int, int]:
|
| 439 |
"""
|
| 440 |
Reviewer 角色:分析代码失败原因并提供修复计划。
|
| 441 |
|
|
@@ -453,7 +451,7 @@ Output ONLY a number between 0-100, nothing else."""
|
|
| 453 |
reviewer_input = [
|
| 454 |
{
|
| 455 |
"role": "user",
|
| 456 |
-
"content": f"You are an expert programmer. The following code was generated to solve a problem but failed sample test cases.\n\n## Problem:\n{problem_prompt}\n\n## Generated Code:\n```\n{code}\n```\n\n## Test Report:\n{test_log}\n\nPlease analyze why the code failed and provide a specific plan to fix it. Do not generate the full code, just the analysis and fix plan."
|
| 457 |
}
|
| 458 |
]
|
| 459 |
|
|
@@ -477,54 +475,11 @@ Output ONLY a number between 0-100, nothing else."""
|
|
| 477 |
return "Code failed sample tests. Please check logic and edge cases.", 0, 0
|
| 478 |
|
| 479 |
|
| 480 |
-
def Reviewer_qwen32b(self,problem_prompt: str, code: str, test_log: str, task_id: str = "unknown") -> Tuple[str, int, int]:
|
| 481 |
-
"""
|
| 482 |
-
Reviewer 角色:分析代码失败原因并提供修复计划。
|
| 483 |
-
|
| 484 |
-
Args:
|
| 485 |
-
problem_prompt (str): 题目描述。
|
| 486 |
-
code (str):生成的代码。
|
| 487 |
-
test_log (str): 测试失败的日志报告。
|
| 488 |
-
task_id (str): 当前任务的ID,用于日志打印 (对应原代码中的 i)。
|
| 489 |
-
|
| 490 |
-
Returns:
|
| 491 |
-
Tuple[str, int, int]: 返回 (分析结果, prompt_token消耗, completion_token消耗)
|
| 492 |
-
"""
|
| 493 |
-
|
| 494 |
-
# 构造 Prompt
|
| 495 |
-
reviewer_input = [
|
| 496 |
-
{
|
| 497 |
-
"role": "user",
|
| 498 |
-
"content": f"You are an expert programmer. The following code was generated to solve a problem but failed sample test cases.\n\n## Problem:\n{problem_prompt}\n\n## Generated Code:\n```\n{code}\n```\n\n## Test Report:\n{test_log}\n\nPlease analyze why the code failed and provide a specific plan to fix it. Do not generate the full code, just the analysis and fix plan."
|
| 499 |
-
}
|
| 500 |
-
]
|
| 501 |
-
|
| 502 |
-
print(f"Input for Reviewer analysis: {task_id}")
|
| 503 |
-
|
| 504 |
-
try:
|
| 505 |
-
qwen_model = Qwen()
|
| 506 |
-
analysis, q_pr_tok, q_com_tok = qwen_model.prompt(reviewer_input)
|
| 507 |
-
|
| 508 |
-
print(f"Reviewer Analysis: {analysis}", flush=True)
|
| 509 |
-
return analysis, q_pr_tok, q_com_tok
|
| 510 |
-
|
| 511 |
-
except NameError:
|
| 512 |
-
print("Error: Qwen model class not found. Skipping detailed analysis.")
|
| 513 |
-
return "Code failed sample tests. Please check logic and edge cases.", 0, 0
|
| 514 |
-
|
| 515 |
-
except Exception as e:
|
| 516 |
-
print(f"Error during Reviewer analysis: {e}")
|
| 517 |
-
return "Code failed sample tests. Please check logic and edge cases.", 0, 0
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
| 523 |
def debugging(self, plan: list, code: str, item: dict, algorithm_prompt: str) -> str:
|
| 524 |
passed = False
|
| 525 |
planning, _, _ = plan
|
| 526 |
|
| 527 |
-
plan_code_response = f"## Planning: {planning}\n## Code:\n```\n{code}\n```"
|
| 528 |
|
| 529 |
if type(self.data) == APPSDataset or type(self.data) == CodeContestDataset or type(self.data) == XCodeDataset:
|
| 530 |
std_input_prompt = "## Note: Strictly follow the input and output format. The input should be taken from Standard input and output should be given to standard output. If you are writing a function then after the function definition take input using `input()` function then call the function with specified parameters and finally print the output of the function. Do not add extra print statement otherwise it will failed the test cases."
|
|
@@ -545,13 +500,14 @@ Output ONLY a number between 0-100, nothing else."""
|
|
| 545 |
problem_prompt = self.data.get_prompt(item)
|
| 546 |
|
| 547 |
# 调用 Reviewer
|
| 548 |
-
analysis, pr_cost, com_cost = self.
|
| 549 |
problem_prompt=problem_prompt,
|
|
|
|
| 550 |
code=code,
|
| 551 |
test_log=test_log,
|
| 552 |
task_id=i
|
| 553 |
)
|
| 554 |
-
|
| 555 |
# 更新 Token 计数
|
| 556 |
self.pr_tok += pr_cost
|
| 557 |
self.com_tok += com_cost
|
|
@@ -560,8 +516,7 @@ Output ONLY a number between 0-100, nothing else."""
|
|
| 560 |
input_for_improving_code = [
|
| 561 |
{
|
| 562 |
"role": "user",
|
| 563 |
-
"content": f"
|
| 564 |
-
}
|
| 565 |
]
|
| 566 |
|
| 567 |
print("\n\n________________________")
|
|
@@ -611,6 +566,9 @@ Output ONLY a number between 0-100, nothing else."""
|
|
| 611 |
|
| 612 |
def run_single_pass(self, item: dict):
|
| 613 |
"""执行单个问题的多智能体辩论流程"""
|
|
|
|
|
|
|
|
|
|
| 614 |
print("\n" + "="*80)
|
| 615 |
print(f"Processing: {item.get('task_id', item.get('name', 'unknown'))}")
|
| 616 |
print("="*80)
|
|
|
|
| 16 |
except ImportError:
|
| 17 |
print("Warning: Could not import Pangu model. Debugging might fail if Pangu is not defined.")
|
| 18 |
|
|
|
|
| 19 |
from .Base import BaseStrategy
|
| 20 |
from models.Base import BaseModel
|
| 21 |
from results.Results import Results
|
|
|
|
| 56 |
|
| 57 |
def log_response(self, response: str, stage: str, item: dict):
|
| 58 |
"""记录响应到日志文件"""
|
| 59 |
+
log_file = os.path.join(self.log_dir, f"MultiAgentDebate_{self.model.__class__.__name__}_responses.log")
|
| 60 |
|
| 61 |
try:
|
| 62 |
with open(log_file, "a", encoding="utf-8") as f:
|
|
|
|
| 101 |
"""生成初始计划(第一轮)"""
|
| 102 |
role_prompt = self.get_agent_role_prompt(role)
|
| 103 |
problem_description = self.data.get_prompt(item)
|
|
|
|
| 104 |
messages = [
|
| 105 |
{
|
| 106 |
"role": "system",
|
|
|
|
| 124 |
print(f"\n{'='*60}")
|
| 125 |
print(f"[Round 1] {role} - Initial Planning")
|
| 126 |
print(f"{'='*60}")
|
| 127 |
+
print("messages:", messages)
|
| 128 |
response, pr_tok, com_tok = self.gpt_chat(messages)
|
| 129 |
item['api_calls'] = item.get('api_calls', 0) + 1
|
| 130 |
|
|
|
|
| 433 |
return code_str
|
| 434 |
|
| 435 |
|
| 436 |
+
def Reviewer_pangu1b(self,problem_prompt: str, plan: str, code: str, test_log: str, task_id: str = "unknown") -> Tuple[str, int, int]:
|
| 437 |
"""
|
| 438 |
Reviewer 角色:分析代码失败原因并提供修复计划。
|
| 439 |
|
|
|
|
| 451 |
reviewer_input = [
|
| 452 |
{
|
| 453 |
"role": "user",
|
| 454 |
+
"content": f"You are an expert programmer. The following code was generated to solve a problem but failed sample test cases.\n\n## Problem:\n{problem_prompt}\n\n## Plan:\n{plan}\n \n## Generated Code:\n```\n{code}\n```\n\n## Test Report:\n{test_log}\n\nPlease analyze why the code failed and provide a specific plan to fix it. Do not generate the full code, just the analysis and fix plan."
|
| 455 |
}
|
| 456 |
]
|
| 457 |
|
|
|
|
| 475 |
return "Code failed sample tests. Please check logic and edge cases.", 0, 0
|
| 476 |
|
| 477 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 478 |
def debugging(self, plan: list, code: str, item: dict, algorithm_prompt: str) -> str:
|
| 479 |
passed = False
|
| 480 |
planning, _, _ = plan
|
| 481 |
|
| 482 |
+
# plan_code_response = f"## Planning: {planning}\n## Code:\n```\n{code}\n```"
|
| 483 |
|
| 484 |
if type(self.data) == APPSDataset or type(self.data) == CodeContestDataset or type(self.data) == XCodeDataset:
|
| 485 |
std_input_prompt = "## Note: Strictly follow the input and output format. The input should be taken from Standard input and output should be given to standard output. If you are writing a function then after the function definition take input using `input()` function then call the function with specified parameters and finally print the output of the function. Do not add extra print statement otherwise it will failed the test cases."
|
|
|
|
| 500 |
problem_prompt = self.data.get_prompt(item)
|
| 501 |
|
| 502 |
# 调用 Reviewer
|
| 503 |
+
analysis, pr_cost, com_cost = self.Reviewer_pangu1b(
|
| 504 |
problem_prompt=problem_prompt,
|
| 505 |
+
plan = planning,
|
| 506 |
code=code,
|
| 507 |
test_log=test_log,
|
| 508 |
task_id=i
|
| 509 |
)
|
| 510 |
+
print(f"pr_cost: {pr_cost}, com_cost: {com_cost}")
|
| 511 |
# 更新 Token 计数
|
| 512 |
self.pr_tok += pr_cost
|
| 513 |
self.com_tok += com_cost
|
|
|
|
| 516 |
input_for_improving_code = [
|
| 517 |
{
|
| 518 |
"role": "user",
|
| 519 |
+
"content": f" You are an expert competitive programmer. Your task is to fix the provided {self.language} code based on the Expert Analysis. The original code failed sample test cases.\n\n### Problem Description: \n{self.data.get_prompt(item)}\n ### Original Code (Buggy): \n```\n{code}\n```\n### Expert Analysis & Fix Plan:\n{analysis}\n\nImprove your code to solve the problem correctly based on this analysis.\n### Requirement: 1. Read the Expert Analysis carefully. 2. {std_input_prompt} 3. Generate the corrected code.\n\n----------------\nImportant:\n{std_input_prompt}\n## Your response must contain the modified planning and then the {self.language} code inside ``` block to solve this problem." }
|
|
|
|
| 520 |
]
|
| 521 |
|
| 522 |
print("\n\n________________________")
|
|
|
|
| 566 |
|
| 567 |
def run_single_pass(self, item: dict):
|
| 568 |
"""执行单个问题的多智能体辩论流程"""
|
| 569 |
+
self.pr_tok = 0
|
| 570 |
+
self.com_tok = 0
|
| 571 |
+
|
| 572 |
print("\n" + "="*80)
|
| 573 |
print(f"Processing: {item.get('task_id', item.get('name', 'unknown'))}")
|
| 574 |
print("="*80)
|