smallan13 commited on
Commit
c6aa710
·
verified ·
1 Parent(s): d651f89

Update src/promptings/DebateCoder.py

Browse files
Files changed (1) hide show
  1. src/promptings/DebateCoder.py +12 -54
src/promptings/DebateCoder.py CHANGED
@@ -16,7 +16,6 @@ try:
16
  except ImportError:
17
  print("Warning: Could not import Pangu model. Debugging might fail if Pangu is not defined.")
18
 
19
- from models.Qwen import Qwen
20
  from .Base import BaseStrategy
21
  from models.Base import BaseModel
22
  from results.Results import Results
@@ -57,7 +56,7 @@ class DebateCoder(BaseStrategy):
57
 
58
  def log_response(self, response: str, stage: str, item: dict):
59
  """记录响应到日志文件"""
60
- log_file = os.path.join(self.log_dir, f"DebateCoder_{self.model.__class__.__name__}_responses.log")
61
 
62
  try:
63
  with open(log_file, "a", encoding="utf-8") as f:
@@ -102,7 +101,6 @@ Your responsibility is to ensure:
102
  """生成初始计划(第一轮)"""
103
  role_prompt = self.get_agent_role_prompt(role)
104
  problem_description = self.data.get_prompt(item)
105
-
106
  messages = [
107
  {
108
  "role": "system",
@@ -126,7 +124,7 @@ Provide a numbered list of steps to solve this problem."""
126
  print(f"\n{'='*60}")
127
  print(f"[Round 1] {role} - Initial Planning")
128
  print(f"{'='*60}")
129
-
130
  response, pr_tok, com_tok = self.gpt_chat(messages)
131
  item['api_calls'] = item.get('api_calls', 0) + 1
132
 
@@ -435,7 +433,7 @@ Output ONLY a number between 0-100, nothing else."""
435
  return code_str
436
 
437
 
438
- def Reviewer_pangu1b(self,problem_prompt: str, code: str, test_log: str, task_id: str = "unknown") -> Tuple[str, int, int]:
439
  """
440
  Reviewer 角色:分析代码失败原因并提供修复计划。
441
 
@@ -453,7 +451,7 @@ Output ONLY a number between 0-100, nothing else."""
453
  reviewer_input = [
454
  {
455
  "role": "user",
456
- "content": f"You are an expert programmer. The following code was generated to solve a problem but failed sample test cases.\n\n## Problem:\n{problem_prompt}\n\n## Generated Code:\n```\n{code}\n```\n\n## Test Report:\n{test_log}\n\nPlease analyze why the code failed and provide a specific plan to fix it. Do not generate the full code, just the analysis and fix plan."
457
  }
458
  ]
459
 
@@ -477,54 +475,11 @@ Output ONLY a number between 0-100, nothing else."""
477
  return "Code failed sample tests. Please check logic and edge cases.", 0, 0
478
 
479
 
480
- def Reviewer_qwen32b(self,problem_prompt: str, code: str, test_log: str, task_id: str = "unknown") -> Tuple[str, int, int]:
481
- """
482
- Reviewer 角色:分析代码失败原因并提供修复计划。
483
-
484
- Args:
485
- problem_prompt (str): 题目描述。
486
- code (str):生成的代码。
487
- test_log (str): 测试失败的日志报告。
488
- task_id (str): 当前任务的ID,用于日志打印 (对应原代码中的 i)。
489
-
490
- Returns:
491
- Tuple[str, int, int]: 返回 (分析结果, prompt_token消耗, completion_token消耗)
492
- """
493
-
494
- # 构造 Prompt
495
- reviewer_input = [
496
- {
497
- "role": "user",
498
- "content": f"You are an expert programmer. The following code was generated to solve a problem but failed sample test cases.\n\n## Problem:\n{problem_prompt}\n\n## Generated Code:\n```\n{code}\n```\n\n## Test Report:\n{test_log}\n\nPlease analyze why the code failed and provide a specific plan to fix it. Do not generate the full code, just the analysis and fix plan."
499
- }
500
- ]
501
-
502
- print(f"Input for Reviewer analysis: {task_id}")
503
-
504
- try:
505
- qwen_model = Qwen()
506
- analysis, q_pr_tok, q_com_tok = qwen_model.prompt(reviewer_input)
507
-
508
- print(f"Reviewer Analysis: {analysis}", flush=True)
509
- return analysis, q_pr_tok, q_com_tok
510
-
511
- except NameError:
512
- print("Error: Qwen model class not found. Skipping detailed analysis.")
513
- return "Code failed sample tests. Please check logic and edge cases.", 0, 0
514
-
515
- except Exception as e:
516
- print(f"Error during Reviewer analysis: {e}")
517
- return "Code failed sample tests. Please check logic and edge cases.", 0, 0
518
-
519
-
520
-
521
-
522
-
523
  def debugging(self, plan: list, code: str, item: dict, algorithm_prompt: str) -> str:
524
  passed = False
525
  planning, _, _ = plan
526
 
527
- plan_code_response = f"## Planning: {planning}\n## Code:\n```\n{code}\n```"
528
 
529
  if type(self.data) == APPSDataset or type(self.data) == CodeContestDataset or type(self.data) == XCodeDataset:
530
  std_input_prompt = "## Note: Strictly follow the input and output format. The input should be taken from Standard input and output should be given to standard output. If you are writing a function then after the function definition take input using `input()` function then call the function with specified parameters and finally print the output of the function. Do not add extra print statement otherwise it will failed the test cases."
@@ -545,13 +500,14 @@ Output ONLY a number between 0-100, nothing else."""
545
  problem_prompt = self.data.get_prompt(item)
546
 
547
  # 调用 Reviewer
548
- analysis, pr_cost, com_cost = self.Reviewer_qwen32b(
549
  problem_prompt=problem_prompt,
 
550
  code=code,
551
  test_log=test_log,
552
  task_id=i
553
  )
554
-
555
  # 更新 Token 计数
556
  self.pr_tok += pr_cost
557
  self.com_tok += com_cost
@@ -560,8 +516,7 @@ Output ONLY a number between 0-100, nothing else."""
560
  input_for_improving_code = [
561
  {
562
  "role": "user",
563
- "content": f"Given a competitive programming problem you have generated {self.language} code to solve the problem. But the generated code can not pass sample test cases.\n\nHere is an analysis of the failure and a fix plan provided by an expert:\n{analysis}\n\nImprove your code to solve the problem correctly based on this analysis.\n{algorithm_prompt}\n## Problem to be solved:\n{self.data.get_prompt(item)}\n{plan_code_response}\n## Test Report:\n{test_log}\n## Modified Planning:\n## Let's think step by step to modify {self.language} Code for solving this problem.\n\n----------------\nImportant:\n{std_input_prompt}\n## Your response must contain the modified planning and then the {self.language} code inside ``` block to solve this problem."
564
- }
565
  ]
566
 
567
  print("\n\n________________________")
@@ -611,6 +566,9 @@ Output ONLY a number between 0-100, nothing else."""
611
 
612
  def run_single_pass(self, item: dict):
613
  """执行单个问题的多智能体辩论流程"""
 
 
 
614
  print("\n" + "="*80)
615
  print(f"Processing: {item.get('task_id', item.get('name', 'unknown'))}")
616
  print("="*80)
 
16
  except ImportError:
17
  print("Warning: Could not import Pangu model. Debugging might fail if Pangu is not defined.")
18
 
 
19
  from .Base import BaseStrategy
20
  from models.Base import BaseModel
21
  from results.Results import Results
 
56
 
57
  def log_response(self, response: str, stage: str, item: dict):
58
  """记录响应到日志文件"""
59
+ log_file = os.path.join(self.log_dir, f"MultiAgentDebate_{self.model.__class__.__name__}_responses.log")
60
 
61
  try:
62
  with open(log_file, "a", encoding="utf-8") as f:
 
101
  """生成初始计划(第一轮)"""
102
  role_prompt = self.get_agent_role_prompt(role)
103
  problem_description = self.data.get_prompt(item)
 
104
  messages = [
105
  {
106
  "role": "system",
 
124
  print(f"\n{'='*60}")
125
  print(f"[Round 1] {role} - Initial Planning")
126
  print(f"{'='*60}")
127
+ print("messages:", messages)
128
  response, pr_tok, com_tok = self.gpt_chat(messages)
129
  item['api_calls'] = item.get('api_calls', 0) + 1
130
 
 
433
  return code_str
434
 
435
 
436
+ def Reviewer_pangu1b(self,problem_prompt: str, plan: str, code: str, test_log: str, task_id: str = "unknown") -> Tuple[str, int, int]:
437
  """
438
  Reviewer 角色:分析代码失败原因并提供修复计划。
439
 
 
451
  reviewer_input = [
452
  {
453
  "role": "user",
454
+ "content": f"You are an expert programmer. The following code was generated to solve a problem but failed sample test cases.\n\n## Problem:\n{problem_prompt}\n\n## Plan:\n{plan}\n \n## Generated Code:\n```\n{code}\n```\n\n## Test Report:\n{test_log}\n\nPlease analyze why the code failed and provide a specific plan to fix it. Do not generate the full code, just the analysis and fix plan."
455
  }
456
  ]
457
 
 
475
  return "Code failed sample tests. Please check logic and edge cases.", 0, 0
476
 
477
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
478
  def debugging(self, plan: list, code: str, item: dict, algorithm_prompt: str) -> str:
479
  passed = False
480
  planning, _, _ = plan
481
 
482
+ # plan_code_response = f"## Planning: {planning}\n## Code:\n```\n{code}\n```"
483
 
484
  if type(self.data) == APPSDataset or type(self.data) == CodeContestDataset or type(self.data) == XCodeDataset:
485
  std_input_prompt = "## Note: Strictly follow the input and output format. The input should be taken from Standard input and output should be given to standard output. If you are writing a function then after the function definition take input using `input()` function then call the function with specified parameters and finally print the output of the function. Do not add extra print statement otherwise it will failed the test cases."
 
500
  problem_prompt = self.data.get_prompt(item)
501
 
502
  # 调用 Reviewer
503
+ analysis, pr_cost, com_cost = self.Reviewer_pangu1b(
504
  problem_prompt=problem_prompt,
505
+ plan = planning,
506
  code=code,
507
  test_log=test_log,
508
  task_id=i
509
  )
510
+ print(f"pr_cost: {pr_cost}, com_cost: {com_cost}")
511
  # 更新 Token 计数
512
  self.pr_tok += pr_cost
513
  self.com_tok += com_cost
 
516
  input_for_improving_code = [
517
  {
518
  "role": "user",
519
+ "content": f" You are an expert competitive programmer. Your task is to fix the provided {self.language} code based on the Expert Analysis. The original code failed sample test cases.\n\n### Problem Description: \n{self.data.get_prompt(item)}\n ### Original Code (Buggy): \n```\n{code}\n```\n### Expert Analysis & Fix Plan:\n{analysis}\n\nImprove your code to solve the problem correctly based on this analysis.\n### Requirement: 1. Read the Expert Analysis carefully. 2. {std_input_prompt} 3. Generate the corrected code.\n\n----------------\nImportant:\n{std_input_prompt}\n## Your response must contain the modified planning and then the {self.language} code inside ``` block to solve this problem." }
 
520
  ]
521
 
522
  print("\n\n________________________")
 
566
 
567
  def run_single_pass(self, item: dict):
568
  """执行单个问题的多智能体辩论流程"""
569
+ self.pr_tok = 0
570
+ self.com_tok = 0
571
+
572
  print("\n" + "="*80)
573
  print(f"Processing: {item.get('task_id', item.get('name', 'unknown'))}")
574
  print("="*80)