martinjosifoski commited on
Commit
6fffc74
1 Parent(s): 9a2680b

Add an abstract CodeTesting Flow and working CF_CodeTesting Flow.

Browse files
CF_Code.py CHANGED
@@ -4,6 +4,7 @@ from flows.flow_verse import load_class
4
  repository_id = os.environ.get("OpenAIChatAtomicFlow")
5
  OpenAIChatAtomicFlow = load_class(repository_id, "OpenAIChatAtomicFlow")
6
 
 
7
  class CF_Code(OpenAIChatAtomicFlow):
8
  def __init__(self, **kwargs):
9
  super().__init__(**kwargs)
 
4
  repository_id = os.environ.get("OpenAIChatAtomicFlow")
5
  OpenAIChatAtomicFlow = load_class(repository_id, "OpenAIChatAtomicFlow")
6
 
7
+
8
  class CF_Code(OpenAIChatAtomicFlow):
9
  def __init__(self, **kwargs):
10
  super().__init__(**kwargs)
CF_Code.yaml CHANGED
@@ -73,13 +73,12 @@ input_keys:
73
 
74
  output_keys:
75
  - "code"
76
-
77
- response_annotators:
78
- code_extractor:
79
- _target_: flows.message_annotators.RegexFirstOccurrenceExtractor
80
  regex: '(?<=```python)([\s\S]*?)(?=```)'
81
  regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
82
- key: "code"
 
83
  strip: True
84
  assert_unique: True
85
  verbose: True
 
73
 
74
  output_keys:
75
  - "code"
76
+ output_data_transformations:
77
+ - _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
 
 
78
  regex: '(?<=```python)([\s\S]*?)(?=```)'
79
  regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
80
+ input_key: "raw_response"
81
+ output_key: "code"
82
  strip: True
83
  assert_unique: True
84
  verbose: True
CF_CodeCollab.yaml CHANGED
@@ -27,7 +27,7 @@ subflows_config:
27
  _target_: langchain.PromptTemplate
28
  template: |2-
29
  # Feedback on the last proposed solution
30
- {{query}}
31
 
32
 
33
  Consider the original problem statement, the last proposed solution and the provided feedback. Does the solution need to be updated? If so, provide the corrected version of the code in the following format:
@@ -37,15 +37,23 @@ subflows_config:
37
  otherwise, reply:
38
  "Final answer."
39
  input_variables:
40
- - query
41
  partial_variables:
42
  code_placeholder: "{{python_code}}"
43
  template_format: jinja2
44
- response_annotators:
45
- end_of_interaction_annotator:
46
- _target_: flows.message_annotators.EndOfInteraction
47
- end_of_interaction_message: "Final answer"
48
- key: "end_of_interaction"
 
 
 
 
 
 
 
 
49
  verbose: True
50
  output_keys:
51
  - "code"
@@ -55,7 +63,7 @@ subflows_config:
55
  class_name: CF_CodeCritic
56
  overrides:
57
  name: CodeCritic
58
- outputs_transformations:
59
- - _target_: flows.outputs_transformations.Rename
60
  old_key2new_key:
61
- raw_response: "query"
 
27
  _target_: langchain.PromptTemplate
28
  template: |2-
29
  # Feedback on the last proposed solution
30
+ {{code_feedback}}
31
 
32
 
33
  Consider the original problem statement, the last proposed solution and the provided feedback. Does the solution need to be updated? If so, provide the corrected version of the code in the following format:
 
37
  otherwise, reply:
38
  "Final answer."
39
  input_variables:
40
+ - code_feedback
41
  partial_variables:
42
  code_placeholder: "{{python_code}}"
43
  template_format: jinja2
44
+ default_human_input_key: "code_feedback"
45
+ output_data_transformations:
46
+ - _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
47
+ regex: '(?<=```python)([\s\S]*?)(?=```)'
48
+ regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
49
+ input_key: "raw_response"
50
+ output_key: "code"
51
+ strip: True
52
+ assert_unique: True
53
+ verbose: True
54
+ - _target_: flows.data_transformations.EndOfInteraction
55
+ end_of_interaction_string: "Final answer"
56
+ output_key: "end_of_interaction"
57
  verbose: True
58
  output_keys:
59
  - "code"
 
63
  class_name: CF_CodeCritic
64
  overrides:
65
  name: CodeCritic
66
+ output_data_transformations:
67
+ - _target_: flows.data_transformations.KeyRename
68
  old_key2new_key:
69
+ raw_response: "code_feedback"
CF_CodeCritic.yaml CHANGED
@@ -73,3 +73,4 @@ input_keys:
73
  - "output_description"
74
  - "io_examples_and_explanation"
75
  - "code"
 
 
73
  - "output_description"
74
  - "io_examples_and_explanation"
75
  - "code"
76
+ output_keys: []
CF_CodeDebug.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from flows.base_flows import GeneratorCriticFlow
2
+
3
+
4
+ class CF_CodeDebug(GeneratorCriticFlow):
5
+ def __init__(self, **kwargs):
6
+ super().__init__(**kwargs)
CF_CodeDebug.yaml ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "CodeDebug_Flow"
2
+ verbose: True
3
+ description: "ToDO: add description"
4
+
5
+ reset_generator_every_round: False
6
+ reset_critic_every_round: True
7
+ max_rounds: 2 # ToDo: To increase to 4
8
+ early_exit_key: "end_of_interaction"
9
+
10
+ input_keys:
11
+ - "problem_description"
12
+ - "input_description"
13
+ - "output_description"
14
+ - "io_examples_and_explanation"
15
+ - "public_tests_individual_io"
16
+ output_keys:
17
+ - "code"
18
+
19
+ subflows_config:
20
+ - _target_: flows.flow_verse.instantiate_flow
21
+ repository_id: ${oc.env:CC_FLOWS}
22
+ class_name: CF_Code
23
+ overrides:
24
+ name: "CodeGenerator"
25
+ model_name: "gpt-4"
26
+ human_message_prompt_template:
27
+ template: |2-
28
+ {{query}}
29
+
30
+
31
+ Consider the problem statement, the last proposed solution, and its issue. Provide a corrected version of the code that solves the original problem and resolves the issue, without any explanation, in the following format:
32
+ ```python
33
+ {{code_placeholder}}
34
+ ```
35
+ input_variables:
36
+ - query
37
+ partial_variables:
38
+ code_placeholder: "{{python_code}}"
39
+ input_data_transformations:
40
+ - _target_: flows.data_transformations.KeyRename
41
+ old_key2new_key:
42
+ "test_results_summary": "query"
43
+ output_data_transformations:
44
+ - _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
45
+ regex: '(?<=```python)([\s\S]*?)(?=```)'
46
+ regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
47
+ input_key: "raw_response"
48
+ output_key: "code"
49
+ strip: True
50
+ assert_unique: True
51
+ verbose: True
52
+ - _target_: flows.data_transformations.EndOfInteraction
53
+ end_of_interaction_string: "Final answer"
54
+ output_key: "end_of_interaction"
55
+ verbose: True
56
+ output_keys:
57
+ - "code"
58
+ - "end_of_interaction"
59
+
60
+ - _target_: flows.flow_verse.instantiate_flow
61
+ repository_id: ${oc.env:CC_FLOWS}
62
+ class_name: CF_CodeTesting
63
+ overrides:
64
+ name: "CodeTestingCritic"
CF_CodeReflect.yaml CHANGED
@@ -12,7 +12,6 @@ input_keys:
12
  - "input_description"
13
  - "output_description"
14
  - "io_examples_and_explanation"
15
-
16
  output_keys:
17
  - "code"
18
 
@@ -23,29 +22,27 @@ subflows_config:
23
  overrides:
24
  name: "CodeGenerator"
25
  model_name: "gpt-4"
26
- response_annotators:
27
- end_of_interaction_annotator:
28
- _target_: flows.message_annotators.EndOfInteraction
29
- end_of_interaction_message: "Final answer"
30
- key: "end_of_interaction"
 
 
 
 
 
 
 
 
 
 
 
31
  verbose: True
32
  output_keys:
33
  - "code"
34
  - "end_of_interaction"
35
- - _target_: flows.base_flows.FixedReplyAtomicFlow.instantiate_with_overrides
36
- overrides:
37
- name: "CodeFixedReplyCritic"
38
- description: "ToDo: Add description"
39
- input_keys: []
40
- outputs_transformations:
41
- - _target_: flows.outputs_transformations.Rename
42
- old_key2new_key:
43
- raw_response: "query"
44
- fixed_reply: |2-
45
- Consider the problem statement and the last proposed solution. Are you sure that the solution is provided in the requested format, and crucially, solves the problem?
46
- If that is not the case, provide the corrected version of the code in the following format:
47
- ```python
48
- {{python_code}}
49
- ```
50
- otherwise, reply:
51
- "Final answer."
 
12
  - "input_description"
13
  - "output_description"
14
  - "io_examples_and_explanation"
 
15
  output_keys:
16
  - "code"
17
 
 
22
  overrides:
23
  name: "CodeGenerator"
24
  model_name: "gpt-4"
25
+ input_data_transformations:
26
+ - _target_: flows.data_transformations.KeyRename
27
+ old_key2new_key:
28
+ "code_reflect_message": "query"
29
+ output_data_transformations:
30
+ - _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
31
+ regex: '(?<=```python)([\s\S]*?)(?=```)'
32
+ regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
33
+ input_key: "raw_response"
34
+ output_key: "code"
35
+ strip: True
36
+ assert_unique: True
37
+ verbose: True
38
+ - _target_: flows.data_transformations.EndOfInteraction
39
+ end_of_interaction_string: "Final answer"
40
+ output_key: "end_of_interaction"
41
  verbose: True
42
  output_keys:
43
  - "code"
44
  - "end_of_interaction"
45
+
46
+ - _target_: flows.flow_verse.instantiate_flow
47
+ repository_id: ${oc.env:CC_FLOWS}
48
+ class_name: FixedReply_CodeReflect
 
 
 
 
 
 
 
 
 
 
 
 
 
CF_CodeTesting.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Dict
2
+
3
+ from flows import utils
4
+ from .src.evaluation import testing_utils_codeforces
5
+ from .CodeTesting import CodeTesting
6
+
7
+ log = utils.get_pylogger(__name__)
8
+
9
+ # ToDo: Add a flags to control whether hidden, public or both tests should be used for evaluation
10
+
11
+
12
+ class CF_CodeTesting(CodeTesting):
13
+ REQUIRED_KEYS_CONFIG = []
14
+ REQUIRED_KEYS_KWARGS = []
15
+
16
+ def __init__(self, **kwargs):
17
+ super().__init__(**kwargs)
18
+
19
+ def _get_test_data(self, input_data: Dict):
20
+ """This function retrieves (or generates) input-output pairs that will be used to test the implementation."""
21
+ return input_data["public_tests_individual_io"]
22
+
23
+ def _run_tests(self, input_data: Dict, test_data: Dict) -> Dict[str, Any]:
24
+ testing_results = testing_utils_codeforces.evaluate_solution_for_problem(
25
+ candidate_solution=input_data["code"],
26
+ public_tests_io=test_data
27
+ )
28
+
29
+ for test_output in testing_results["public_tests_results"]:
30
+ test_output["input"] = "\n".join(test_output["input"])
31
+
32
+ return testing_results
CF_CodeTesting.yaml ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ description: "ToDo: add description"
2
+ input_keys:
3
+ - "code"
4
+ - "public_tests_individual_io"
5
+ output_keys:
6
+ - "all_tests_passed"
7
+ - "tests_results_summary"
8
+ output_data_transformations:
9
+ - _target_: CC_flows.src.data_transformations.CorrectnessFlag # ToDo: This import style would not work if the flow is synced in the current implementation (the outer directory is a hash and not the name; figure out how to do the import robustly; e.g., using relative imports)
10
+ input_key: "public_tests_results" # ToDo: Add support for nested keys and update this to raw_response.public_tests_results?
11
+ output_key: "all_tests_passed"
12
+ - _target_: CC_flows.src.data_transformations.TestResultsSummaryGeneration
13
+ output_key: "tests_results_summary"
14
+
15
+ single_test_error_message: True
16
+
17
+ no_error_template: |2-
18
+ ${.issue_title}
19
+ All of the executed tests passed.
20
+
21
+ compilation_error_template: |2-
22
+ ${.issue_title}
23
+ The execution resulted in a compilation error.
24
+ ## Compilation error message:
25
+ {{error_message}}
26
+ timeout_error_template: |2-
27
+ ${.issue_title}
28
+ The execution timed out, the solution is not efficient enough.
29
+ runtime_error_template: |2-
30
+ ${.issue_title}
31
+ The execution resulted in a runtime error on the following test.
32
+ ## [Failed test] Input
33
+ ```
34
+ {{test_input}}
35
+ ```
36
+ ## [Failed test] Runtime error message
37
+ {{error_message}}
38
+ single_test_error_template: |2-
39
+ ${.issue_title}
40
+ The Python code does not solve the problem in the problem description due to logical errors. It fails the following test:
41
+ ## [Failed test] Input
42
+ ```
43
+ {{test_input}}
44
+ ```
45
+ ## [Failed test] Expected output
46
+ ```
47
+ {{expected_output}}
48
+ ```
49
+ ## [Failed test] Generated output
50
+ ```
51
+ {{generated_output}}
52
+ ```
53
+ all_tests_header: |2-
54
+ ${.issue_title}
55
+ The Python code does not solve the problem in the problem description due to logical errors. It fails on the following tests.
56
+ test_error_template: |2-
57
+ ## [Failed test {{idx}}]
58
+ ### [Failed test {{idx}}] Input
59
+ ```
60
+ {{test_input}}
61
+ ```
62
+ ### [Failed test {{idx}}] Expected output
63
+ ```
64
+ {{expected_output}}
65
+ ```
66
+ ### [Failed test {{idx}}] Generated output
67
+ ```
68
+ {{generated_output}}
69
+ ```
70
+ tests_separator: "\n\n"
71
+
72
+ issue_title: "# Issue with the last proposed solution"
73
+
74
+ feedback_title: "# Feedback on the last proposed solution"
75
+
76
+ no_code_template: |2-
77
+ ${.feedback_title}
78
+ The code was not provided in the correct output format specified in the request or it was not provided at all.
79
+ feedback_only_template: |2-
80
+ ${.feedback_title}
81
+ {{feedback_content}}
82
+ feedback_and_issue_template: |2-
83
+ {{issue_description}}
84
+
85
+ {{feedback_content}}
CF_CodeWithPlan.yaml CHANGED
@@ -81,12 +81,11 @@ input_keys:
81
  output_keys:
82
  - "code"
83
 
84
- response_annotators:
85
- code_extractor:
86
- _target_: flows.message_annotators.RegexFirstOccurrenceExtractor
87
  regex: '(?<=```python)([\s\S]*?)(?=```)'
88
  regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
89
- key: "code"
90
  strip: True
91
  assert_unique: True
92
  verbose: True
 
81
  output_keys:
82
  - "code"
83
 
84
+ output_data_transformations:
85
+ - _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
 
86
  regex: '(?<=```python)([\s\S]*?)(?=```)'
87
  regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
88
+ output_key: "code"
89
  strip: True
90
  assert_unique: True
91
  verbose: True
CF_Plan.yaml CHANGED
@@ -75,13 +75,12 @@ input_keys:
75
  output_keys:
76
  - "plan"
77
 
78
- response_annotators:
79
- plan_extractor:
80
- _target_: flows.message_annotators.RegexFirstOccurrenceExtractor
81
  regex: '(?<=Conceptual solution)([\s\S]*?)(?=\n\n# [A-Z]|\Z)'
82
  regex_fallback:
83
  - '(?<=Conceptual solution:)([\s\S]*?)(?=\n\n# [A-Z]|\Z)'
84
- key: "plan"
85
  strip: True
86
  assert_unique: True
87
  verbose: True
 
75
  output_keys:
76
  - "plan"
77
 
78
+ output_data_transformations:
79
+ - _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
 
80
  regex: '(?<=Conceptual solution)([\s\S]*?)(?=\n\n# [A-Z]|\Z)'
81
  regex_fallback:
82
  - '(?<=Conceptual solution:)([\s\S]*?)(?=\n\n# [A-Z]|\Z)'
83
+ output_key: "plan"
84
  strip: True
85
  assert_unique: True
86
  verbose: True
CF_PlanCollab.yaml CHANGED
@@ -27,7 +27,7 @@ subflows_config:
27
  _target_: langchain.PromptTemplate
28
  template: |2-
29
  # Feedback on the last proposed conceptual solution
30
- {{query}}
31
 
32
 
33
  Consider the original problem statement, the last proposed solution and the provided feedback. Does the solution need to be updated? If so, provide the corrected version of the conceptual solution in the following format:
@@ -36,15 +36,23 @@ subflows_config:
36
  otherwise, reply:
37
  "Final answer."
38
  input_variables:
39
- - query
40
  partial_variables:
41
  plan_placeholder: "{{conceptual_solution}}"
42
  template_format: jinja2
43
- response_annotators:
44
- end_of_interaction_annotator:
45
- _target_: flows.message_annotators.EndOfInteraction
46
- end_of_interaction_message: "Final answer"
47
- key: "end_of_interaction"
 
 
 
 
 
 
 
 
48
  verbose: True
49
  output_keys:
50
  - "plan"
@@ -54,7 +62,7 @@ subflows_config:
54
  class_name: CF_PlanCritic
55
  overrides:
56
  name: PlanCritic
57
- outputs_transformations:
58
- - _target_: flows.outputs_transformations.Rename
59
  old_key2new_key:
60
- raw_response: "query"
 
27
  _target_: langchain.PromptTemplate
28
  template: |2-
29
  # Feedback on the last proposed conceptual solution
30
+ {{plan_feedback}}
31
 
32
 
33
  Consider the original problem statement, the last proposed solution and the provided feedback. Does the solution need to be updated? If so, provide the corrected version of the conceptual solution in the following format:
 
36
  otherwise, reply:
37
  "Final answer."
38
  input_variables:
39
+ - plan_feedback
40
  partial_variables:
41
  plan_placeholder: "{{conceptual_solution}}"
42
  template_format: jinja2
43
+ default_human_input_key: "plan_feedback"
44
+ output_data_transformations:
45
+ - _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
46
+ regex: '(?<=Conceptual solution)([\s\S]*?)(?=\n\n# [A-Z]|\Z)'
47
+ regex_fallback:
48
+ - '(?<=Conceptual solution:)([\s\S]*?)(?=\n\n# [A-Z]|\Z)'
49
+ output_key: "plan"
50
+ strip: True
51
+ assert_unique: True
52
+ verbose: True
53
+ - _target_: flows.data_transformations.EndOfInteraction
54
+ end_of_interaction_string: "Final answer"
55
+ output_key: "end_of_interaction"
56
  verbose: True
57
  output_keys:
58
  - "plan"
 
62
  class_name: CF_PlanCritic
63
  overrides:
64
  name: PlanCritic
65
+ output_data_transformations:
66
+ - _target_: flows.data_transformations.KeyRename
67
  old_key2new_key:
68
+ raw_response: "plan_feedback"
CF_PlanCritic.yaml CHANGED
@@ -74,4 +74,4 @@ input_keys:
74
  - "plan"
75
 
76
  output_keys:
77
- - "query"
 
74
  - "plan"
75
 
76
  output_keys:
77
+ - "plan_feedback"
CF_PlanReflect.yaml CHANGED
@@ -26,21 +26,22 @@ subflows_config:
26
  response_annotators:
27
  end_of_interaction_annotator:
28
  _target_: flows.message_annotators.EndOfInteraction
29
- end_of_interaction_message: "Final answer"
30
  key: "end_of_interaction"
31
  verbose: True
32
  output_keys:
33
  - "plan"
34
  - "end_of_interaction"
35
- - _target_: flows.base_flows.FixedReplyAtomicFlow.instantiate_with_overrides
 
 
36
  overrides:
37
  name: "PlanFixedReplyCritic"
38
  description: "ToDo: Add description"
39
- input_keys: []
40
- outputs_transformations:
41
- - _target_: flows.outputs_transformations.Rename
42
- old_key2new_key:
43
- raw_response: "query"
44
  fixed_reply: |2-
45
  Consider the problem statement and the last proposed solution. Are you sure that the solution is provided in the requested format, and crucially, solves the problem?
46
  If that is not the case, provide the corrected version of the conceptual solution in the following format:
 
26
  response_annotators:
27
  end_of_interaction_annotator:
28
  _target_: flows.message_annotators.EndOfInteraction
29
+ end_of_interaction_string: "Final answer"
30
  key: "end_of_interaction"
31
  verbose: True
32
  output_keys:
33
  - "plan"
34
  - "end_of_interaction"
35
+ - _target_: flows.flow_verse.instantiate_flow
36
+ repository_id: ${oc.env:CC_FLOWS}
37
+ class_name: CF_Reflect
38
  overrides:
39
  name: "PlanFixedReplyCritic"
40
  description: "ToDo: Add description"
41
+ input_keys:
42
+ - "plan"
43
+ output_keys:
44
+ - "query"
 
45
  fixed_reply: |2-
46
  Consider the problem statement and the last proposed solution. Are you sure that the solution is provided in the requested format, and crucially, solves the problem?
47
  If that is not the case, provide the corrected version of the conceptual solution in the following format:
CodeTesting.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from copy import deepcopy
2
+ from typing import Optional, Any, List, Dict
3
+
4
+ from flows import utils
5
+ from flows.base_flows.abstract import AtomicFlow
6
+
7
+ log = utils.get_pylogger(__name__)
8
+
9
+
10
+ class CodeTesting(AtomicFlow):
11
+ REQUIRED_KEYS_CONFIG = []
12
+ REQUIRED_KEYS_KWARGS = []
13
+
14
+ def __init__(self, **kwargs):
15
+ super().__init__(**kwargs)
16
+
17
+ def _get_test_data(self, input_data: Dict):
18
+ """This function retrieves (or generates) input-output pairs that will be used to test the implementation."""
19
+ raise NotImplementedError()
20
+
21
+ def _run_tests(self, input_data: Dict, test_data: Dict) -> Dict[str, Any]:
22
+ raise NotImplementedError()
23
+
24
+ @classmethod
25
+ def instantiate_from_config(cls, config):
26
+ flow_config = deepcopy(config)
27
+
28
+ kwargs = {"flow_config": flow_config}
29
+ kwargs["input_data_transformations"] = cls._set_up_data_transformations(config["input_data_transformations"])
30
+ kwargs["output_data_transformations"] = cls._set_up_data_transformations(config["output_data_transformations"])
31
+
32
+ # ~~~ Instantiate flow ~~~
33
+ return cls(**kwargs)
34
+
35
+ def run(self,
36
+ input_data: Dict[str, Any],
37
+ private_keys: Optional[List[str]] = [],
38
+ keys_to_ignore_for_hash: Optional[List[str]] = []) -> Dict[str, Any]:
39
+
40
+ # ~~~ Retrieve the test data ~~~
41
+ test_data = self._get_test_data(input_data)
42
+
43
+ # ~~~ Run tests ~~~
44
+ response = self._run_tests(input_data, test_data)
45
+
46
+ return response
47
+
48
+ # from typing import Optional, Any, List, Dict
49
+ #
50
+ # from flows.base_flows.abstract import AtomicFlow
51
+ # from flows.utils.general_helpers import validate_parameters
52
+ #
53
+ # class CodeTester(AtomicFlow):
54
+ # REQUIRED_KEYS_CONFIG = []
55
+ # REQUIRED_KEYS_KWARGS = []
56
+ #
57
+ # def __init__(self, **kwargs):
58
+ # super().__init__(**kwargs)
59
+ #
60
+ # @classmethod
61
+ # def _validate_parameters(cls, kwargs):
62
+ # validate_parameters(cls, kwargs) # is this necessary?
63
+ #
64
+ # def run(self,
65
+ # input_data: Dict[str, Any],
66
+ # private_keys: Optional[List[str]] = [],
67
+ # keys_to_ignore_for_hash: Optional[List[str]] = []) -> Dict[str, Any]:
68
+
FixedReply_CodeReflect.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from flows.base_flows import FixedReplyAtomicFlow
2
+
3
+
4
+ class FixedReply_CodeReflect(FixedReplyAtomicFlow):
5
+ def __init__(self, **kwargs):
6
+ super().__init__(**kwargs)
FixedReply_CodeReflect.yaml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "CodeReflectCritic"
2
+ description: "A flow that prompts the user to reflect on their code and provide a corrected version if necessary."
3
+ verbose: True
4
+
5
+ input_keys:
6
+ - "code"
7
+
8
+ output_keys:
9
+ - "code_reflect_message"
10
+ output_data_transformations:
11
+ - _target_: flows.data_transformations.KeyRename
12
+ old_key2new_key:
13
+ raw_response: "code_reflect_message"
14
+
15
+ fixed_reply: |2-
16
+ Consider the problem statement and the last proposed solution. Are you sure that the solution is provided in the requested format, and crucially, solves the problem?
17
+ If that is not the case, provide the corrected version of the code in the following format:
18
+ ```python
19
+ {{python_code}}
20
+ ```
21
+ otherwise, reply:
22
+ "Final answer."
23
+
24
+
25
+
__init__.py CHANGED
@@ -1,16 +1,31 @@
 
1
  from .CF_Code import CF_Code
 
 
 
2
  from .CF_CodeReflect import CF_CodeReflect
3
 
 
4
  from .CF_CodeCritic import CF_CodeCritic
5
  from .CF_CodeCollab import CF_CodeCollab
6
 
 
7
  from .CF_Plan import CF_Plan
8
  from .CF_CodeWithPlan import CF_CodeWithPlan
9
  from .CF_Plan_Code import CF_Plan_Code
10
 
 
11
  from .CF_PlanReflect import CF_PlanReflect
12
  from .CF_PlanReflect_Code import CF_PlanReflect_Code
13
 
 
14
  from .CF_PlanCritic import CF_PlanCritic
15
  from .CF_PlanCollab import CF_PlanCollab
16
  from .CF_PlanCollab_Code import CF_PlanCollab_Code
 
 
 
 
 
 
 
 
1
+ # cf-code
2
  from .CF_Code import CF_Code
3
+
4
+ # cf-code_reflect
5
+ from .FixedReply_CodeReflect import FixedReply_CodeReflect
6
  from .CF_CodeReflect import CF_CodeReflect
7
 
8
+ # cf-code_collab
9
  from .CF_CodeCritic import CF_CodeCritic
10
  from .CF_CodeCollab import CF_CodeCollab
11
 
12
+ # cf-plan-code (and cf-plan_oracle-code)
13
  from .CF_Plan import CF_Plan
14
  from .CF_CodeWithPlan import CF_CodeWithPlan
15
  from .CF_Plan_Code import CF_Plan_Code
16
 
17
+ # cf-plan_reflect-code
18
  from .CF_PlanReflect import CF_PlanReflect
19
  from .CF_PlanReflect_Code import CF_PlanReflect_Code
20
 
21
+ # cf-plan_collab-code
22
  from .CF_PlanCritic import CF_PlanCritic
23
  from .CF_PlanCollab import CF_PlanCollab
24
  from .CF_PlanCollab_Code import CF_PlanCollab_Code
25
+
26
+ # cf-code_debug
27
+ from .CF_CodeTesting import CF_CodeTesting
28
+ from .CF_CodeDebug import CF_CodeDebug
29
+
30
+ # from .CF_Debug import CF_Debug
31
+ # from .CF_CodeTestDebug import CF_CodeTestDebug
src/data_transformations/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .correctness_flag import CorrectnessFlag
2
+ from .test_results_summary_generation import TestResultsSummaryGeneration
src/data_transformations/correctness_flag.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Any
2
+
3
+ from flows.data_transformations.abstract import DataTransformation
4
+
5
+
6
+ class CorrectnessFlag(DataTransformation):
7
+ def __init__(self, output_key, input_key):
8
+ super().__init__(output_key)
9
+ self.input_key = input_key
10
+
11
+ def __call__(self, data_dict: Dict[str, Any], **kwargs) -> Dict[str, Any]:
12
+ test_data = data_dict["raw_response"]
13
+ all_tests_passed = all([test_result["status"] for test_result in test_data[self.input_key]])
14
+ data_dict[self.output_key] = all_tests_passed
15
+ return data_dict
src/data_transformations/test_results_summary_generation.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Any
2
+
3
+ import jinja2
4
+
5
+ from flows.data_transformations.abstract import DataTransformation
6
+
7
+
8
+ class TestResultsSummaryGeneration(DataTransformation):
9
+ def __init__(self, output_key, **kwargs):
10
+ super().__init__(output_key)
11
+ self.params = kwargs
12
+
13
+ def __call__(self, data_dict: Dict[str, Any], **kwargs) -> Dict[str, Any]:
14
+ if data_dict["all_tests_passed"]:
15
+ # the execution did not result in any errors
16
+ data_dict["test_results_summary"] = self.params["no_error_template"]
17
+ return data_dict
18
+
19
+ test_data = data_dict["raw_response"]
20
+
21
+ if not test_data["compilation_status"]:
22
+ # compilation error occurred
23
+ kwargs = {
24
+ "error_message": test_data["compilation_error_message"].strip(),
25
+ }
26
+
27
+ message_content = (
28
+ jinja2.Environment(loader=jinja2.BaseLoader())
29
+ .from_string(self.params["compilation_error_template"])
30
+ .render(**kwargs)
31
+ )
32
+ elif test_data["timeout_error"]:
33
+ # timeout error occurred
34
+
35
+ message_content = self.params["timeout_error_template"]
36
+ else:
37
+ # code compiled successfully without timeouts
38
+
39
+ # retrieve the failed tests
40
+ failed_tests = [
41
+ test_result
42
+ for test_result in test_data["public_tests_results"]
43
+ if not test_result["status"]
44
+ ]
45
+
46
+ runtime_error_test = None
47
+ for test_result in failed_tests:
48
+ if test_result["generated_output"] is None:
49
+ # runtime error occurred
50
+ runtime_error_test = test_result
51
+
52
+ if runtime_error_test:
53
+ # construct the error message for the runtime error
54
+ kwargs = {
55
+ "test_input": runtime_error_test["input"],
56
+ "error_message": runtime_error_test["error_message"].strip(),
57
+ }
58
+
59
+ message_content = (
60
+ jinja2.Environment(loader=jinja2.BaseLoader())
61
+ .from_string(self.params["runtime_error_template"])
62
+ .render(**kwargs)
63
+ )
64
+ else:
65
+ # construct the error message corresponding to a logical error
66
+
67
+ if self.params["single_test_error_message"]:
68
+ # construct the error message for a single (the first) failed test
69
+ first_failed_test = failed_tests[0]
70
+
71
+ kwargs = {
72
+ "test_input": first_failed_test["input"],
73
+ "expected_output": first_failed_test["expected_output"],
74
+ "generated_output": first_failed_test["generated_output"],
75
+ }
76
+
77
+ message_content = (
78
+ jinja2.Environment(loader=jinja2.BaseLoader())
79
+ .from_string(self.params["single_test_error_template"])
80
+ .render(**kwargs)
81
+ )
82
+ else:
83
+ # construct the error message covering all failed tests
84
+ parts = [self.params["all_tests_header"]]
85
+
86
+ for idx, test_result in enumerate(failed_tests):
87
+ kwargs = {
88
+ "idx": idx + 1,
89
+ "test_input": test_result["input"],
90
+ "expected_output": test_result["expected_output"],
91
+ "generated_output": test_result["generated_output"],
92
+ }
93
+
94
+ parts.append(
95
+ jinja2.Environment(loader=jinja2.BaseLoader())
96
+ .from_string(self.params["test_error_template"])
97
+ .render(**kwargs)
98
+ )
99
+
100
+ message_content = self.params["tests_separator"].join(parts)
101
+ data_dict["test_results_summary"] = message_content
102
+ return data_dict