martinjosifoski
/

CC_flows

Model card Files Files and versions Community

add LC_Code

by yeeef - opened Jul 5, 2023

base: refs/heads/main

←

from: refs/pr/3

Discussion Files changed

+1658

-4

Files changed (42) hide show

LC_Code.py +0 -2
LC_CodeCollab.py +6 -0
LC_CodeCollab.yaml +71 -0
LC_CodeCritic.py +7 -0
LC_CodeCritic.yaml +77 -0
LC_CodeCriticWrongAttempt.py +6 -0
LC_CodeCriticWrongAttempt.yaml +89 -0
LC_CodeCriticWrongAttemptWithPlan.py +6 -0
LC_CodeCriticWrongAttemptWithPlan.yaml +97 -0
LC_CodeDebug.py +12 -0
LC_CodeDebug.yaml +64 -0
LC_CodeDebugCollab.py +12 -0
LC_CodeDebugCollab.yaml +68 -0
LC_CodeDebugCollabWithPlan.py +12 -0
LC_CodeDebugCollabWithPlan.yaml +68 -0
LC_CodeDebugCritic.py +13 -0
LC_CodeDebugCritic.yaml +31 -0
LC_CodeDebugCriticWithPlan.py +13 -0
LC_CodeDebugCriticWithPlan.yaml +34 -0
LC_CodeReflect.py +6 -0
LC_CodeReflect.yaml +49 -0
LC_CodeTesting.py +33 -0
LC_CodeTesting.yaml +90 -0
LC_CodeWithPlan.py +6 -0
LC_CodeWithPlan.yaml +92 -0
LC_Plan.py +6 -0
LC_Plan.yaml +80 -0
LC_PlanCollab.py +6 -0
LC_PlanCollab.yaml +69 -0
LC_PlanCollab_Code.py +6 -0
LC_PlanCollab_Code.yaml +22 -0
LC_PlanCritic.py +6 -0
LC_PlanCritic.yaml +77 -0
LC_PlanReflect.py +6 -0
LC_PlanReflect.yaml +63 -0
LC_PlanReflect_Code.py +6 -0
LC_PlanReflect_Code.yaml +15 -0
LC_Plan_Code.py +6 -0
LC_Plan_Code.yaml +26 -0
__init__.py +43 -1
src/evaluation/testing_utils_codeforces.py +1 -1
src/evaluation/testing_utils_leetcode.py +258 -0

LC_Code.py CHANGED Viewed

@@ -1,5 +1,3 @@
-import os
 from martinjosifoski.OpenAIChatAtomicFlow import OpenAIChatAtomicFlow




1	from martinjosifoski.OpenAIChatAtomicFlow import OpenAIChatAtomicFlow
2
3

LC_CodeCollab.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from flows.base_flows import GeneratorCriticFlow
+class LC_CodeCollab(GeneratorCriticFlow):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)

LC_CodeCollab.yaml ADDED Viewed

	@@ -0,0 +1,71 @@

+name: "CodeCollab_Flow"
+verbose: True
+description: "ToDO: add description"
+reset_generator_every_round: False
+reset_critic_every_round: True
+max_rounds: 2 # ToDO: To increase to 4
+early_exit_key: "end_of_interaction"
+input_data_transformations: []
+input_keys:
+  - "problem_description"
+  - "io_description"
+  - "constraints"
+  - "python_stub"
+output_data_transformations:
+  - _target_: flows.data_transformations.KeyRename
+    old_key2new_key:
+      raw_response.code: "code"
+output_keys:
+  - "code"
+subflows_config:
+  - _target_: martinjosifoski.CC_flows.LC_Code.instantiate_from_default_config
+    overrides:
+      name: "CodeGenerator"
+      model_name: "gpt-4"
+      human_message_prompt_template:
+        _target_: langchain.PromptTemplate
+        template: |2-
+          # Feedback on the last proposed solution
+          {{code_feedback}}
+          Consider the original problem statement, the last proposed solution and the provided feedback. Does the solution need to be updated? If so, provide the corrected version of the code in the following format:
+          ```python
+          {{code_placeholder}}
+          ```
+          otherwise, reply:
+          "Final answer."
+        input_variables:
+          - code_feedback
+        partial_variables:
+          code_placeholder: "{{python_code}}"
+        template_format: jinja2
+      default_human_input_keys:
+        - "code_feedback"
+      output_data_transformations:
+        - _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
+          regex: '(?<=```python)([\s\S]*?)(?=```)'
+          regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
+          input_key: "raw_response"
+          output_key: "code"
+          strip: True
+          assert_unique: True
+          verbose: True
+        - _target_: flows.data_transformations.EndOfInteraction
+          end_of_interaction_string: "Final answer"
+          output_key: "end_of_interaction"
+          verbose: True
+      output_keys:
+        - "code"
+        - "end_of_interaction"
+  - _target_: martinjosifoski.CC_flows.LC_CodeCritic.instantiate_from_default_config
+    overrides:
+      name: CodeCritic
+      output_data_transformations:
+        - _target_: flows.data_transformations.KeyRename
+          old_key2new_key:
+            raw_response: "code_feedback"

LC_CodeCritic.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from martinjosifoski.OpenAIChatAtomicFlow import OpenAIChatAtomicFlow
+class LC_CodeCritic(OpenAIChatAtomicFlow):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)

LC_CodeCritic.yaml ADDED Viewed

	@@ -0,0 +1,77 @@

+name: "CodeCritic_Flow"
+verbose: True
+description: "ToDO: add description"
+model_name: "gpt-4"
+generation_parameters:
+  n: 1
+  max_tokens: 3000
+  temperature: 0.3
+  model_kwargs:
+    top_p: 0.2
+    frequency_penalty: 0
+    presence_penalty: 0
+system_message_prompt_template:
+  _target_: langchain.PromptTemplate
+  template: |2-
+    Your goal is to identify potential issues with a competitive programming solution attempt.
+    The user will specify the problem by providing you with:
+      - the problem statement
+      - example test cases
+      - the constraints of the problem
+      - (optional) explanation of the test cases
+      - a Python solution attempt
+    Crucially, your goal is to correctly identify potential issues with the solution attempt, and not to provide the code implementation yourself.
+    The user will provide you with a task and an output format that you will strictly follow.
+  input_variables: []
+  template_format: jinja2
+human_message_prompt_template:
+  _target_: langchain.PromptTemplate
+  template: "{{query}}"
+  input_variables:
+    - "query"
+  template_format: jinja2
+query_message_prompt_template:
+  _target_: langchain.PromptTemplate
+  template: |2-
+    # Problem statement
+    {{problem_description}}
+    {{io_description}}
+    # Constraints
+    {{constraints}}
+    # code stub
+    ```python
+    {{python_stub}}
+    ```
+    # Python solution attempt
+    ```python
+    {{code}}
+    ```
+    Consider the problem statement and the solution attempt. Are there any issues with the proposed solution or it is correct? Explain your reasoning very concisely, and do not provide code.
+  input_variables:
+    - "problem_description"
+    - "io_description"
+    - "constraints"
+    - "python_stub"
+    - "code"
+  template_format: jinja2
+input_keys:
+  - "problem_description"
+  - "io_description"
+  - "constraints"
+  - "python_stub"
+  - "code"
+output_keys: []

LC_CodeCriticWrongAttempt.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from martinjosifoski.OpenAIChatAtomicFlow import OpenAIChatAtomicFlow
+class LC_CodeCriticWrongAttempt(OpenAIChatAtomicFlow):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)

LC_CodeCriticWrongAttempt.yaml ADDED Viewed

	@@ -0,0 +1,89 @@

+name: "CodeCriticWrongAttempt_Flow"
+verbose: True
+description: "ToDO: add description"
+model_name: "gpt-4"
+generation_parameters:
+  n: 1
+  max_tokens: 3000
+  temperature: 0.3
+  model_kwargs:
+    top_p: 0.2
+    frequency_penalty: 0
+    presence_penalty: 0
+system_message_prompt_template:
+  _target_: langchain.PromptTemplate
+  template: |2-
+    Your goal is to identify the issues with an incorrect competitive programming solution attempt.
+    The user will specify the problem by providing you with:
+      - the problem statement
+      - example test cases
+      - the constraints of the problem
+      - python stub code
+      - an incorrect Python solution attempt and a description of its issue
+    Crucially, your goal is to consider all aspects of the problem and pinpoint the issues with the solution attempt, and not to provide the code implementation yourself.
+    Some aspects to consider: Is the input correctly parsed? Is the output correctly formatted? Are the corner cases correctly handled? Is there a logical mistake with the algorithm itself?
+    Use the code execution results provided in the issue description to guide your reasoning/debugging.
+  input_variables: []
+  template_format: jinja2
+human_message_prompt_template:
+  _target_: langchain.PromptTemplate
+  template: "{{query}}"
+  input_variables:
+    - "query"
+  template_format: jinja2
+query_message_prompt_template:
+  _target_: langchain.PromptTemplate
+  template: |2-
+    # Problem statement
+    {{problem_description}}
+    {{io_description}}
+    # Constraints
+    {{constraints}}
+     The code should extend the following stub:
+    ```python
+    {{python_stub}}
+    ```
+    # Solution attempt to be fixed
+    ```python
+    {{code}}
+    ```
+    {{testing_results_summary}}
+    Consider the problem statement, the solution attempt and the issue. Why is the solution attempt incorrect? How should it be fixed? Explain your reasoning very concisely, and do not provide code.
+  input_variables:
+    - "problem_description"
+    - "io_description"
+    - "constraints"
+    - "python_stub"
+    - "code"
+    - "testing_results_summary"
+  template_format: jinja2
+input_data_transformations: []
+input_keys:
+  - "problem_description"
+  - "io_description"
+  - "constraints"
+  - "python_stub"
+  - "testing_results_summary"
+  - "code"
+output_data_transformations:
+  - _target_: flows.data_transformations.KeyRename
+    old_key2new_key:
+      raw_response: "code_feedback"
+output_keys:
+  - "code_feedback"

LC_CodeCriticWrongAttemptWithPlan.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from martinjosifoski.OpenAIChatAtomicFlow import OpenAIChatAtomicFlow
+class LC_CodeCriticWrongAttemptWithPlan(OpenAIChatAtomicFlow):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)

LC_CodeCriticWrongAttemptWithPlan.yaml ADDED Viewed

	@@ -0,0 +1,97 @@

+name: "CodeCriticWrongAttemptWithPlan_Flow"
+verbose: True
+description: "ToDO: add description"
+model_name: "gpt-4"
+generation_parameters:
+  n: 1
+  max_tokens: 3000
+  temperature: 0.3
+  model_kwargs:
+    top_p: 0.2
+    frequency_penalty: 0
+    presence_penalty: 0
+system_message_prompt_template:
+  _target_: langchain.PromptTemplate
+  template: |2-
+    Your goal is to identify the issues with an incorrect competitive programming solution attempt.
+    The user will specify the problem by providing you with:
+      - the problem statement
+      - example test cases
+      - the constraints of the problem
+      - python stub code
+      - an incorrect Python solution attempt and a description of its issue
+    Additionally, the user will provide you with a conceptual solution to the problem which should guide your reasoning.
+    Crucially, your goal is to consider all aspects of the problem and pinpoint the issues with the solution attempt, and not to provide the code implementation yourself.
+    Some aspects to consider: Is the input correctly parsed? Is the output correctly formatted? Is the code implementation consistent with the conceptual solution? Are the corner cases correctly handled? Is there a logical mistake with the algorithm itself?
+    Use the code execution results provided in the issue description to guide your reasoning/debugging.
+  input_variables: []
+  template_format: jinja2
+human_message_prompt_template:
+  _target_: langchain.PromptTemplate
+  template: "{{query}}"
+  input_variables:
+    - "query"
+  template_format: jinja2
+query_message_prompt_template:
+  _target_: langchain.PromptTemplate
+  template: |2-
+    # Problem statement
+    {{problem_description}}
+    {{io_description}}
+    # Constraints
+    {{constraints}}
+     The code should extend the following stub:
+    ```python
+    {{python_stub}}
+    ```
+    # Conceptual solution
+    {{plan}}
+    # Solution attempt to be fixed
+    ```python
+    {{code}}
+    ```
+    {{testing_results_summary}}
+    Consider the problem statement, the conceptual solution, the code implementation and the issue. Why is the solution attempt incorrect? How should it be fixed? Explain your reasoning very concisely, and do not provide code.
+  input_variables:
+    - "problem_description"
+    - "io_description"
+    - "constraints"
+    - "python_stub"
+    - "plan"
+    - "code"
+    - "testing_results_summary"
+  template_format: jinja2
+input_data_transformations: []
+input_keys:
+    - "problem_description"
+    - "io_description"
+    - "constraints"
+    - "python_stub"
+    - "testing_results_summary"
+    - "plan"
+    - "code"
+output_data_transformations:
+  - _target_: flows.data_transformations.KeyRename
+    old_key2new_key:
+      raw_response: "code_feedback"
+output_keys:
+  - "code_feedback"

LC_CodeDebug.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from flows.base_flows import GeneratorCriticFlow
+class LC_CodeDebug(GeneratorCriticFlow):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+    def _early_exit(self):
+        if self.flow_state.get("all_tests_passed", False):
+            return True
+        return super()._early_exit()

LC_CodeDebug.yaml ADDED Viewed

	@@ -0,0 +1,64 @@

+name: "CodeDebug_Flow"
+verbose: True
+description: "ToDO: add description"
+reset_generator_every_round: False
+reset_critic_every_round: True
+max_rounds: 2 # ToDo: To increase to 4
+early_exit_key: "end_of_interaction"
+input_data_transformations: []
+input_keys:
+  - "problem_description"
+  - "io_description"
+  - "constraints"
+  - "python_stub"
+  - "public_tests_individual_io"
+output_data_transformations:
+  - _target_: flows.data_transformations.KeyRename
+    old_key2new_key:
+      raw_response.code: "code"
+output_keys:
+  - "code"
+subflows_config:
+  - _target_: martinjosifoski.CC_flows.LC_Code.instantiate_from_default_config
+    overrides:
+      name: "CodeGenerator"
+      model_name: "gpt-4"
+      human_message_prompt_template:
+        template: |2-
+          {{testing_results_summary}}
+          Consider the problem statement, the last proposed solution, and its issue. Provide a corrected version of the code that solves the original problem and resolves the issue, without any explanation, in the following format:
+          ```python
+          {{code_placeholder}}
+          ```
+        input_variables:
+          - testing_results_summary
+        partial_variables:
+          code_placeholder: "{{python_code}}"
+      default_human_input_keys:
+        - "testing_results_summary"
+        - "all_tests_passed"
+      output_data_transformations:
+        - _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
+          regex: '(?<=```python)([\s\S]*?)(?=```)'
+          regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
+          input_key: "raw_response"
+          output_key: "code"
+          strip: True
+          assert_unique: True
+          verbose: True
+        - _target_: flows.data_transformations.EndOfInteraction
+          end_of_interaction_string: "Final answer"
+          output_key: "end_of_interaction"
+          verbose: True
+      output_keys:
+        - "code"
+        - "end_of_interaction"
+  - _target_: martinjosifoski.CC_flows.LC_CodeTesting.instantiate_from_default_config
+    overrides:
+      name: "CodeTestingCritic"

LC_CodeDebugCollab.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from flows.base_flows import GeneratorCriticFlow
+class LC_CodeDebugCollab(GeneratorCriticFlow):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+    def _early_exit(self):
+        if self.flow_state.get("all_tests_passed", False):
+            return True
+        return super()._early_exit()

LC_CodeDebugCollab.yaml ADDED Viewed

	@@ -0,0 +1,68 @@

+name: "CodeDebugCollab_Flow"
+verbose: True
+description: "ToDO: add description"
+reset_generator_every_round: False
+reset_critic_every_round: True
+max_rounds: 2 # ToDo: To increase to 4
+early_exit_key: "end_of_interaction"
+input_data_transformations: []
+input_keys:
+  - "problem_description"
+  - "io_description"
+  - "constraints"
+  - "python_stub"
+  - "public_tests_individual_io"
+output_data_transformations:
+  - _target_: flows.data_transformations.KeyRename
+    old_key2new_key:
+      raw_response.code: "code"
+output_keys:
+  - "code"
+subflows_config:
+  - _target_: martinjosifoski.CC_flows.LC_Code.instantiate_from_default_config
+    overrides:
+      name: "CodeGenerator"
+      model_name: "gpt-4"
+      human_message_prompt_template:
+        _target_: langchain.PromptTemplate
+        template: |2-
+          {{testing_results_summary}}
+          {{code_feedback}}
+          Consider the problem statement, the last proposed solution, its issue and the provided feedback. Return a corrected version of the code that solves the original problem and resolves the issue, without any explanation, in the following format:
+          ```python
+          {{code_placeholder}}
+          ```
+        input_variables:
+          - code_feedback
+          - testing_results_summary
+        partial_variables:
+          code_placeholder: "{{python_code}}"
+        template_format: jinja2
+      default_human_input_keys:
+        - "code_feedback"
+        - "testing_results_summary"
+      output_data_transformations:
+        - _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
+          regex: '(?<=```python)([\s\S]*?)(?=```)'
+          regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
+          input_key: "raw_response"
+          output_key: "code"
+          strip: True
+          assert_unique: True
+          verbose: True
+        - _target_: flows.data_transformations.EndOfInteraction
+          end_of_interaction_string: "Final answer"
+          output_key: "end_of_interaction"
+          verbose: True
+      output_keys:
+        - "code"
+        - "end_of_interaction"
+  - _target_: martinjosifoski.CC_flows.LC_CodeDebugCritic.instantiate_from_default_config

LC_CodeDebugCollabWithPlan.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from flows.base_flows import GeneratorCriticFlow
+class LC_CodeDebugCollabWithPlan(GeneratorCriticFlow):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+    def _early_exit(self):
+        if self.flow_state.get("all_tests_passed", False):
+            return True
+        return super()._early_exit()

LC_CodeDebugCollabWithPlan.yaml ADDED Viewed

	@@ -0,0 +1,68 @@

+name: "CodeDebugCollabWithPlan_Flow"
+verbose: True
+description: "ToDO: add description"
+reset_generator_every_round: False
+reset_critic_every_round: True
+max_rounds: 2 # ToDo: To increase to 4
+early_exit_key: "end_of_interaction"
+input_data_transformations: []
+input_keys:
+  - "problem_description"
+  - "io_description"
+  - "constraints"
+  - "python_stub"
+  - "public_tests_individual_io"
+  - "plan"
+output_data_transformations:
+  - _target_: flows.data_transformations.KeyRename
+    old_key2new_key:
+      raw_response.code: "code"
+output_keys:
+  - "code"
+subflows_config:
+  - _target_: martinjosifoski.CC_flows.LC_CodeWithPlan.instantiate_from_default_config
+    overrides:
+      name: "CodeGenerator"
+      model_name: "gpt-4"
+      human_message_prompt_template:
+        _target_: langchain.PromptTemplate
+        template: |2-
+          {{testing_results_summary}}
+          {{code_feedback}}
+          Consider the problem statement, the last proposed solution, its issue and the provided feedback. Return a corrected version of the code that solves the original problem and resolves the issue, without any explanation, in the following format:
+          ```python
+          {{code_placeholder}}
+          ```
+        input_variables:
+          - code_feedback
+          - testing_results_summary
+        partial_variables:
+          code_placeholder: "{{python_code}}"
+        template_format: jinja2
+      default_human_input_keys:
+        - "code_feedback"
+        - "testing_results_summary"
+      output_data_transformations:
+        - _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
+          regex: '(?<=```python)([\s\S]*?)(?=```)'
+          regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
+          input_key: "raw_response"
+          output_key: "code"
+          strip: True
+          assert_unique: True
+          verbose: True
+        - _target_: flows.data_transformations.EndOfInteraction
+          end_of_interaction_string: "Final answer"
+          output_key: "end_of_interaction"
+          verbose: True
+      output_keys:
+        - "code"
+        - "end_of_interaction"
+  - _target_: martinjosifoski.CC_flows.LC_CodeDebugCriticWithPlan.instantiate_from_default_config

LC_CodeDebugCritic.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from flows.base_flows import SequentialFlow
+class LC_CodeDebugCritic(SequentialFlow):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+    def _early_exit(self):
+        if self.flow_state.get("all_tests_passed", False):
+            self.flow_state["code_feedback"] = None
+            return True
+        return super()._early_exit()

LC_CodeDebugCritic.yaml ADDED Viewed

	@@ -0,0 +1,31 @@

+name: "CodeDebugCritic_Flow"
+verbose: True
+description: "ToDo: add description"
+early_exit_key: null
+input_data_transformations: []
+input_keys:
+  - "problem_description"
+  - "io_description"
+  - "constraints"
+  - "python_stub"
+  - "public_tests_individual_io"
+  - "code"
+output_data_transformations:
+  - _target_: flows.data_transformations.KeyRename
+    old_key2new_key:
+      raw_response.testing_results_summary: "testing_results_summary"
+      raw_response.all_tests_passed: "all_tests_passed"
+      raw_response.code_feedback: "code_feedback"
+output_keys:
+  - "testing_results_summary"
+  - "all_tests_passed"
+  - "code_feedback"
+subflows_config:
+  - _target_: martinjosifoski.CC_flows.LC_CodeTesting.instantiate_from_default_config
+    overrides:
+      name: "CodeTestingCritic"
+  - _target_: martinjosifoski.CC_flows.LC_CodeCriticWrongAttempt.instantiate_from_default_config

LC_CodeDebugCriticWithPlan.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from flows.base_flows import SequentialFlow
+class LC_CodeDebugCriticWithPlan(SequentialFlow):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+    def _early_exit(self):
+        if self.flow_state.get("all_tests_passed", False):
+            self.flow_state["code_feedback"] = None
+            return True
+        return super()._early_exit()

LC_CodeDebugCriticWithPlan.yaml ADDED Viewed

	@@ -0,0 +1,34 @@

+name: "CodeDebugCriticWithPlan_Flow"
+verbose: True
+description: "ToDo: add description"
+early_exit_key: null
+input_data_transformations: []
+input_keys:
+  - "problem_description"
+  - "io_description"
+  - "constraints"
+  - "python_stub"
+  - "public_tests_individual_io"
+  - "code"
+  - "plan"
+output_data_transformations:
+  - _target_: flows.data_transformations.KeyRename
+    old_key2new_key:
+      raw_response.testing_results_summary: "testing_results_summary"
+      raw_response.all_tests_passed: "all_tests_passed"
+      raw_response.code_feedback: "code_feedback"
+output_keys:
+  - "testing_results_summary"
+  - "all_tests_passed"
+  - "code_feedback"
+subflows_config:
+  - _target_: martinjosifoski.CC_flows.CF_CodeCritic.instantiate_from_default_config
+    overrides:
+      name: "CodeTestingCritic"
+  - _target_: flows.flow_verse.instantiate_flow
+    repository_id: ${oc.env:CC_FLOWS}
+    class_name: LC_CodeCriticWrongAttemptWithPlan

LC_CodeReflect.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from flows.base_flows import GeneratorCriticFlow
+class LC_CodeReflect(GeneratorCriticFlow):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)

LC_CodeReflect.yaml ADDED Viewed

	@@ -0,0 +1,49 @@

+name: "CodeReflect_Flow"
+verbose: True
+description: "ToDO: add description"
+reset_generator_every_round: False
+reset_critic_every_round: True
+max_rounds: 2 # ToDo: To increase to 4
+early_exit_key: "end_of_interaction"
+input_data_transformations: []
+input_keys:
+  - "problem_description"
+  - "io_description"
+  - "constraints"
+  - "python_stub"
+output_data_transformations:
+  - _target_: flows.data_transformations.KeyRename
+    old_key2new_key:
+      raw_response.code: "code"
+output_keys:
+  - "code"
+subflows_config:
+  - _target_: martinjosifoski.CC_flows.LC_Code.instantiate_from_default_config
+    overrides:
+      name: "CodeGenerator"
+      model_name: "gpt-4"
+      input_data_transformations:
+        - _target_: flows.data_transformations.KeyRename
+          old_key2new_key:
+            code_reflect_message: "query"
+      output_data_transformations:
+        - _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
+          regex: '(?<=```python)([\s\S]*?)(?=```)'
+          regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
+          output_key: "code"
+          strip: True
+          assert_unique: True
+          verbose: True
+        - _target_: flows.data_transformations.EndOfInteraction
+          end_of_interaction_string: "Final answer"
+          output_key: "end_of_interaction"
+          verbose: True
+      output_keys:
+        - "code"
+        - "end_of_interaction"
+  - _target_: martinjosifoski.CC_flows.FixedReply_CodeReflect.instantiate_from_default_config

LC_CodeTesting.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from typing import Any, Dict
+from flows.utils import logging
+from .src.evaluation import testing_utils_leetcode
+from .CodeTesting import CodeTesting
+log = logging.get_logger(__name__)
+# ToDo: Add a flags to control whether hidden, public or both tests should be used for evaluation
+class LC_CodeTesting(CodeTesting):
+    REQUIRED_KEYS_CONFIG = []
+    REQUIRED_KEYS_KWARGS = []
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+    def _get_test_data(self, input_data: Dict):
+        """This function retrieves (or generates) input-output pairs that will be used to test the implementation."""
+        return input_data["public_tests_individual_io"]
+    def _run_tests(self, input_data: Dict, test_data: Dict) -> Dict[str, Any]:
+        testing_results = testing_utils_leetcode.evaluate_solution_for_problem(
+            candidate_solution=input_data["code"],
+            python_stub=input_data["python_stub"],
+            public_tests_io=test_data
+        )
+        for test_output in testing_results["public_tests_results"]:
+            test_output["input"] = "\n".join(test_output["input"])
+        return testing_results

LC_CodeTesting.yaml ADDED Viewed

	@@ -0,0 +1,90 @@

+name: "CodeTesting_Flow"
+description: "ToDo: add description"
+input_data_transformations: []
+input_keys:
+  - "code"
+  - "python_stub"
+  - "public_tests_individual_io"
+output_keys:
+  - "all_tests_passed"
+  - "testing_results_summary"
+output_data_transformations:
+  - _target_: .src.data_transformations.CorrectnessFlag
+    input_key: "raw_response.public_tests_results"
+    output_key: "all_tests_passed"
+  - _target_: .src.data_transformations.TestingResultsSummaryGeneration
+    output_key: "testing_results_summary"
+    single_test_error_message: True
+    no_error_template: |2-
+      ${.issue_title}
+      All of the executed tests passed.
+    compilation_error_template: |2-
+      ${.issue_title}
+      The execution resulted in a compilation error.
+      ## Compilation error message:
+      {{error_message}}
+    timeout_error_template: |2-
+      ${.issue_title}
+      The execution timed out, the solution is not efficient enough.
+    runtime_error_template: |2-
+      ${.issue_title}
+      The execution resulted in a runtime error on the following test.
+      ## [Failed test] Input
+      ```
+      {{test_input}}
+      ```
+      ## [Failed test] Runtime error message
+      {{error_message}}
+    single_test_error_template: |2-
+      ${.issue_title}
+      The Python code does not solve the problem in the problem description due to logical errors. It fails the following test:
+      ## [Failed test] Input
+      ```
+      {{test_input}}
+      ```
+      ## [Failed test] Expected output
+      ```
+      {{expected_output}}
+      ```
+      ## [Failed test] Generated output
+      ```
+      {{generated_output}}
+      ```
+    all_tests_header: |2-
+      ${.issue_title}
+      The Python code does not solve the problem in the problem description due to logical errors. It fails on the following tests.
+    test_error_template: |2-
+      ## [Failed test {{idx}}]
+      ### [Failed test {{idx}}] Input
+      ```
+      {{test_input}}
+      ```
+      ### [Failed test {{idx}}] Expected output
+      ```
+      {{expected_output}}
+      ```
+      ### [Failed test {{idx}}] Generated output
+      ```
+      {{generated_output}}
+      ```
+    tests_separator: "\n\n"
+    issue_title: "# Issue with the last proposed solution"
+    feedback_title: "# Feedback on the last proposed solution"
+    no_code_template: |2-
+      ${.feedback_title}
+      The code was not provided in the correct output format specified in the request or it was not provided at all.
+    feedback_only_template: |2-
+      ${.feedback_title}
+      {{feedback_content}}
+    feedback_and_issue_template: |2-
+      {{issue_description}}
+      {{feedback_content}}

LC_CodeWithPlan.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from martinjosifoski.OpenAIChatAtomicFlow import OpenAIChatAtomicFlow
+class LC_CodeWithPlan(OpenAIChatAtomicFlow):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)

LC_CodeWithPlan.yaml ADDED Viewed

	@@ -0,0 +1,92 @@

+name: "CodeFlowWithPlan_Flow"
+verbose: True
+description: "ToDO: add description"
+model_name: "gpt-4"
+generation_parameters:
+  n: 1
+  max_tokens: 3000
+  temperature: 0.3
+  model_kwargs:
+    top_p: 0.2
+    frequency_penalty: 0
+    presence_penalty: 0
+system_message_prompt_template:
+  _target_: langchain.PromptTemplate
+  template: |2-
+    Your goal is to provide executable Python code that solves a coding interview problem. The code should correctly handle all corner cases in order to pass the hidden test cases, which are used to evaluate the correctness of the solution.
+    The user will specify the problem by providing you with:
+      - the problem statement
+      - example test cases
+      - the constraints of the problem
+    Additionally, the user will provide you with a conceptual solution to the problem which should guide your reasoning and the code implementation.
+    The user will provide you with a task and an output format that you will strictly follow.
+  input_variables: []
+  template_format: jinja2
+human_message_prompt_template:
+  _target_: langchain.PromptTemplate
+  template: "{{query}}"
+  input_variables:
+    - "query"
+  template_format: jinja2
+query_message_prompt_template:
+  _target_: langchain.PromptTemplate
+  template: |2-
+    # Problem statement
+    {{problem_description}}
+    {{io_description}}
+    # Constraints
+    {{constraints}}
+    # Conceptual solution
+    {{plan}}
+    Return Python code that solves the problem. The code should extend the following stub:
+    ```python
+    {{python_stub}}
+    ```
+    without changing the method signatures.
+    Reply in the following format:
+    ```python
+    {{code_placeholder}}
+    ```
+    ```
+  input_variables:
+    - "problem_description"
+    - "io_description"
+    - "constraints"
+    - "python_stub"
+    - "plan"
+  partial_variables:
+    code_placeholder: "{{python_code}}"
+  template_format: jinja2
+input_data_transformations: []
+input_keys:
+  - "problem_description"
+  - "io_description"
+  - "constraints"
+  - "python_stub"
+  - "plan"
+output_data_transformations:
+  - _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
+    regex: '(?<=```python)([\s\S]*?)(?=```)'
+    regex_fallback: '(?<=```)([\s\S]*?)(?=```)'
+    output_key: "code"
+    strip: True
+    assert_unique: True
+    verbose: True
+output_keys:
+  - "code"

LC_Plan.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from martinjosifoski.OpenAIChatAtomicFlow import OpenAIChatAtomicFlow
+class LC_Plan(OpenAIChatAtomicFlow):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)

LC_Plan.yaml ADDED Viewed

	@@ -0,0 +1,80 @@

+name: "Plan_Flow"
+verbose: True
+description: "ToDO: add description"
+model_name: "gpt-4"
+generation_parameters:
+  n: 1
+  max_tokens: 3000
+  temperature: 0.3
+  model_kwargs:
+    top_p: 0.2
+    frequency_penalty: 0
+    presence_penalty: 0
+input_data_transformations: []
+input_keys:
+    - "problem_description"
+    - "io_description"
+    - "constraints"
+output_data_transformations:
+  - _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
+    regex: '(?<=Conceptual solution)([\s\S]*?)(?=\n\n# [A-Z]|\Z)'
+    regex_fallback:
+      - '(?<=Conceptual solution:)([\s\S]*?)(?=\n\n# [A-Z]|\Z)'
+    output_key: "plan"
+    strip: True
+    assert_unique: True
+    verbose: True
+output_keys:
+  - "plan"
+system_message_prompt_template:
+  _target_: langchain.PromptTemplate
+  template: |2-
+    Your goal is to provide a high-level conceptual solution that, if implemented, will solve a given coding interview problem.
+    The user will specify the problem by providing you with:
+      - the problem statement
+      - example test cases
+      - the constraints of the problem
+    The proposed algorithm should be computationally efficient, logically correct and handle all corner cases.
+    The user will provide you with a task and an output format that you will strictly follow.
+  input_variables: []
+  template_format: jinja2
+human_message_prompt_template:
+  _target_: langchain.PromptTemplate
+  template: "{{query}}"
+  input_variables:
+    - "query"
+  template_format: jinja2
+query_message_prompt_template:
+  _target_: langchain.PromptTemplate
+  template: |2-
+    # Problem statement
+    {{problem_description}}
+    {{io_description}}
+    # Constraints
+    {{constraints}}
+    Return a high-level conceptual solution that would solve the problem. Be very concise, and do not provide code.
+    Reply in the following format:
+    # Conceptual solution
+    {{plan_placeholder}}
+  input_variables:
+    - "problem_description"
+    - "io_description"
+    - "constraints"
+  partial_variables:
+    plan_placeholder: "{{conceptual_solution}}"
+  template_format: jinja2

LC_PlanCollab.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from flows.base_flows import GeneratorCriticFlow
+class LC_PlanCollab(GeneratorCriticFlow):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)

LC_PlanCollab.yaml ADDED Viewed

	@@ -0,0 +1,69 @@

+name: "PlanCollab_Flow"
+verbose: True
+description: "ToDO: add description"
+reset_generator_every_round: False
+reset_critic_every_round: True
+max_rounds: 2 # ToDo: To increase to 4
+early_exit_key: "end_of_interaction"
+input_data_transformations: []
+input_keys:
+  - "problem_description"
+  - "io_description"
+  - "constraints"
+  - "python_stub"
+output_data_transformations:
+  - _target_: flows.data_transformations.KeyRename
+    old_key2new_key:
+      raw_response.plan: "plan"
+output_keys:
+  - "plan"
+subflows_config:
+  - _target_: martinjosifoski.CC_flows.LC_Plan.instantiate_from_default_config
+    overrides:
+      name: "PlanGenerator"
+      human_message_prompt_template:
+        _target_: langchain.PromptTemplate
+        template: |2-
+          # Feedback on the last proposed conceptual solution
+          {{plan_feedback}}
+          Consider the original problem statement, the last proposed solution and the provided feedback. Does the solution need to be updated? If so, provide the corrected version of the conceptual solution in the following format:
+          # Conceptual solution
+          {{plan_placeholder}}
+          otherwise, reply:
+          "Final answer."
+        input_variables:
+          - plan_feedback
+        partial_variables:
+          plan_placeholder: "{{conceptual_solution}}"
+        template_format: jinja2
+      default_human_input_keys:
+        - "plan_feedback"
+      output_data_transformations:
+        - _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
+          regex: '(?<=Conceptual solution)([\s\S]*?)(?=\n\n# [A-Z]|\Z)'
+          regex_fallback:
+            - '(?<=Conceptual solution:)([\s\S]*?)(?=\n\n# [A-Z]|\Z)'
+          output_key: "plan"
+          strip: True
+          assert_unique: True
+          verbose: True
+        - _target_: flows.data_transformations.EndOfInteraction
+          end_of_interaction_string: "Final answer"
+          output_key: "end_of_interaction"
+          verbose: True
+      output_keys:
+        - "plan"
+        - "end_of_interaction"
+  - _target_: martinjosifoski.CC_flows.LC_PlanCritic.instantiate_from_default_config
+    overrides:
+      name: PlanCritic
+      output_data_transformations:
+        - _target_: flows.data_transformations.KeyRename
+          old_key2new_key:
+            raw_response: "plan_feedback"

LC_PlanCollab_Code.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from flows.base_flows import SequentialFlow
+class LC_PlanCollab_Code(SequentialFlow):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)

LC_PlanCollab_Code.yaml ADDED Viewed

	@@ -0,0 +1,22 @@

+name: "PlanCollab_Code_Flow"
+description: "ToDO: add description"
+early_exit_key: null
+input_data_transformations: []
+input_keys:
+  - "problem_description"
+  - "io_description"
+  - "constraints"
+  - "python_stub"
+output_data_transformations:
+  - _target_: flows.data_transformations.KeyRename
+    old_key2new_key:
+      raw_response.code: "code"
+output_keys:
+  - "code"
+subflows_config:
+  - _target_: martinjosifoski.CC_flows.LC_PlanCollab.instantiate_from_default_config
+  - _target_: martinjosifoski.CC_flows.LC_CodeWithPlan.instantiate_from_default_config

LC_PlanCritic.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from martinjosifoski.OpenAIChatAtomicFlow import OpenAIChatAtomicFlow
+class LC_PlanCritic(OpenAIChatAtomicFlow):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)

LC_PlanCritic.yaml ADDED Viewed

	@@ -0,0 +1,77 @@

+name: "PlanCritic_Flow"
+verbose: True
+description: "ToDO: add description"
+model_name: "gpt-4"
+generation_parameters:
+  n: 1
+  max_tokens: 3000
+  temperature: 0.3
+  model_kwargs:
+    top_p: 0.2
+    frequency_penalty: 0
+    presence_penalty: 0
+system_message_prompt_template:
+  _target_: langchain.PromptTemplate
+  template: |2-
+      Your goal is to identify potential issues with a conceptual solution to a given competitive programming problem.
+      The user will specify the problem by providing you with:
+        - the problem statement
+        - example test cases
+        - the constraints of the problem
+        - a conceptual solution attempt
+      Crucially, your goal is to consider all aspects of the problem and pinpoint potential issues with the conceptual solution attempt (if any), and not to provide the conceptual solution or the code implementation yourself.
+      Some aspects to consider: Are there any logical mistakes with the proposed algorithm? Are the corner cases correctly handled?
+      The user will provide you with a task and an output format that you will strictly follow.
+  input_variables: []
+  template_format: jinja2
+human_message_prompt_template:
+  _target_: langchain.PromptTemplate
+  template: "{{query}}"
+  input_variables:
+    - "query"
+  template_format: jinja2
+query_message_prompt_template:
+  _target_: langchain.PromptTemplate
+  template: |2-
+    # Problem statement
+    {{problem_description}}
+    {{io_description}}
+    # Constraints
+    {{constraints}}
+    # code stub
+    ```python
+    {{python_stub}}
+    ```
+    # Conceptual solution attempt
+    {{plan}}
+    Consider the problem statement and the solution attempt. Are there any issues with the proposed conceptual solution or it is correct? Explain your reasoning very concisely.
+  input_variables:
+    - "problem_description"
+    - "io_description"
+    - "constraints"
+    - "python_stub"
+    - "plan"
+  template_format: jinja2
+input_keys:
+  - "problem_description"
+  - "io_description"
+  - "constraints"
+  - "python_stub"
+  - "plan"
+output_keys:
+  - "plan_feedback"

LC_PlanReflect.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from flows.base_flows import GeneratorCriticFlow
+class LC_PlanReflect(GeneratorCriticFlow):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)

LC_PlanReflect.yaml ADDED Viewed

	@@ -0,0 +1,63 @@

+name: "PlanReflect_Flow"
+verbose: True
+description: "ToDO: add description"
+reset_generator_every_round: False
+reset_critic_every_round: True
+max_rounds: 2 # ToDo: increase to 4
+early_exit_key: "end_of_interaction"
+input_data_transformations: []
+input_keys:
+  - "problem_description"
+  - "io_description"
+  - "constraints"
+output_data_transformations:
+  - _target_: flows.data_transformations.KeyRename
+    old_key2new_key:
+      raw_response.plan: "plan"
+output_keys:
+  - "plan"
+subflows_config:
+  - _target_: martinjosifoski.CC_flows.LC_Plan.instantiate_from_default_config
+    overrides:
+      name: "PlanGenerator_Flow"
+      model_name: "gpt-4"
+      input_data_transformations:
+        - _target_: flows.data_transformations.KeyRename
+          old_key2new_key:
+            plan_reflect_message: "query"
+      output_data_transformations:
+        - _target_: flows.data_transformations.RegexFirstOccurrenceExtractor
+          regex: '(?<=Conceptual solution)([\s\S]*?)(?=\n\n# [A-Z]|\Z)'
+          regex_fallback:
+            - '(?<=Conceptual solution:)([\s\S]*?)(?=\n\n# [A-Z]|\Z)'
+          output_key: "plan"
+          strip: True
+          assert_unique: True
+          verbose: True
+        - _target_: flows.data_transformations.EndOfInteraction
+          end_of_interaction_string: "Final answer"
+          output_key: "end_of_interaction"
+          verbose: True
+      output_keys:
+        - "plan"
+        - "end_of_interaction"
+  - _target_: martinjosifoski.CC_flows.FixedReply_PlanReflect.instantiate_from_default_config
+    overrides:
+      name: "PlanFixedReplyCritic"
+      description: "ToDo: Add description"
+      input_keys:
+        - "plan"
+      output_keys:
+        - "query"
+      fixed_reply: |2-
+        Consider the problem statement and the last proposed solution. Are you sure that the solution is provided in the requested format, and crucially, solves the problem?
+        If that is not the case, provide the corrected version of the conceptual solution in the following format:
+        # Conceptual solution
+        {{conceptual_solution}}
+        otherwise, reply:
+        "Final answer."

LC_PlanReflect_Code.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from flows.base_flows import SequentialFlow
+class LC_PlanReflect_Code(SequentialFlow):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)

LC_PlanReflect_Code.yaml ADDED Viewed

	@@ -0,0 +1,15 @@

+name: "PlanReflect_Code_Flow"
+description: "ToDO: add description"
+input_keys:
+  - "problem_description"
+  - "io_description"
+  - "constraints"
+  - "python_stub"
+output_keys:
+  - "code"
+subflows_config:
+  - _target_: martinjosifoski.CC_flows.LC_PlanReflect.instantiate_from_default_config
+  - _target_: martinjosifoski.CC_flows.LC_CodeWithPlan.instantiate_from_default_config

LC_Plan_Code.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from flows.base_flows import SequentialFlow
+class LC_Plan_Code(SequentialFlow):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)

LC_Plan_Code.yaml ADDED Viewed

	@@ -0,0 +1,26 @@

+name: "Plan_Code_Flow"
+description: "ToDO: add description"
+early_exit_key: null
+input_data_transformations: []
+input_keys:
+    - "problem_description"
+    - "io_description"
+    - "constraints"
+    - "python_stub"
+output_data_transformations:
+  - _target_: flows.data_transformations.KeyRename
+    old_key2new_key:
+      raw_response.code: "code"
+output_keys:
+  - "code"
+subflows_config:
+  - _target_: martinjosifoski.CC_flows.LC_Plan.instantiate_from_default_config
+    overrides:
+      model_name: "gpt-4"
+  - _target_: martinjosifoski.CC_flows.LC_CodeWithPlan.instantiate_from_default_config
+    overrides:
+      model_name: "gpt-4"

__init__.py CHANGED Viewed

@@ -3,7 +3,6 @@ from flows import flow_verse
 dependencies = [
     {"url": "martinjosifoski/OpenAIChatAtomicFlow", "revision": "main"},
-    # {"url": os.getenv("CC_flows_repo"), "revision": "main"}
 ]
 flow_verse.sync_dependencies(dependencies)
@@ -48,3 +47,46 @@ from .CF_CodeDebugCollab import CF_CodeDebugCollab
 from .CF_CodeCriticWrongAttemptWithPlan import CF_CodeCriticWrongAttemptWithPlan
 from .CF_CodeDebugCriticWithPlan import CF_CodeDebugCriticWithPlan
 from .CF_CodeDebugCollabWithPlan import CF_CodeDebugCollabWithPlan

 dependencies = [
     {"url": "martinjosifoski/OpenAIChatAtomicFlow", "revision": "main"},
 ]
 flow_verse.sync_dependencies(dependencies)
 from .CF_CodeCriticWrongAttemptWithPlan import CF_CodeCriticWrongAttemptWithPlan
 from .CF_CodeDebugCriticWithPlan import CF_CodeDebugCriticWithPlan
 from .CF_CodeDebugCollabWithPlan import CF_CodeDebugCollabWithPlan
+########################## LC ##########################
+# lc-code
+from .LC_Code import LC_Code
+# lc-code_reflect
+from .LC_CodeReflect import LC_CodeReflect
+# lc-code_collab
+from .LC_CodeCritic import LC_CodeCritic
+from .LC_CodeCollab import LC_CodeCollab
+# lc-plan-code (and lc-plan_oracle-code)
+from .LC_Plan import LC_Plan
+from .LC_CodeWithPlan import LC_CodeWithPlan
+from .LC_Plan_Code import LC_Plan_Code
+# lc-plan_reflect-code
+from .LC_PlanReflect import LC_PlanReflect
+from .LC_PlanReflect_Code import LC_PlanReflect_Code
+# lc-plan_collab-code
+from .LC_PlanCritic import LC_PlanCritic
+from .LC_PlanCollab import LC_PlanCollab
+from .LC_PlanCollab_Code import LC_PlanCollab_Code
+# lc-code_debug
+from .LC_CodeTesting import LC_CodeTesting
+from .LC_CodeDebug import LC_CodeDebug
+# lc-code_debug_collab
+from .LC_CodeCriticWrongAttempt import LC_CodeCriticWrongAttempt
+from .LC_CodeDebugCritic import LC_CodeDebugCritic
+from .LC_CodeDebugCollab import LC_CodeDebugCollab
+# lc-plan_oracle-code_debug_collab
+from .LC_CodeCriticWrongAttemptWithPlan import LC_CodeCriticWrongAttemptWithPlan
+from .LC_CodeDebugCriticWithPlan import LC_CodeDebugCriticWithPlan
+from .LC_CodeDebugCollabWithPlan import LC_CodeDebugCollabWithPlan

src/evaluation/testing_utils_codeforces.py CHANGED Viewed

@@ -17,7 +17,7 @@ from pyext import RuntimeModule
 from wrapt_timeout_decorator import timeout as wrapt_timeout
 import threading
-from src.datasets.schema import assert_test_format_codeforces
 from flows import logging

 from wrapt_timeout_decorator import timeout as wrapt_timeout
 import threading
+from ..datasets.schema import assert_test_format_codeforces
 from flows import logging

src/evaluation/testing_utils_leetcode.py ADDED Viewed

	@@ -0,0 +1,258 @@

+# This is based heavily on the huggingface APPS metric
+# to run the solution files we're using a timing based approach
+# for capturing the stdout
+# used for testing the code that reads from input
+import logging
+import re
+from subprocess import Popen, PIPE, TimeoutExpired
+from typing import List, Tuple
+import threading
+log = logging.getLogger(__name__)
+lock = threading.Lock()
+def evaluate_solution_for_problem(
+    candidate_solution,
+    python_stub,
+    hidden_tests_io=None,
+    public_tests_io=None,
+    timeout=10,
+    debug=False,
+    add_extra_imports=False,
+):
+    with lock:
+        """See the readme for the output format of this function."""
+        if hidden_tests_io is None:
+            hidden_tests_io = []
+        if public_tests_io is None:
+            public_tests_io = []
+        if candidate_solution is None:
+            results_dict = {
+                "compilation_status": False,
+                "compilation_error_message": "No code was provided.",
+                "timeout_error": False,
+                "hidden_tests_results": [
+                    {
+                        "status": False,
+                        "error_message": "No code was provided.",
+                        "generated_output": None,
+                        "input": test[0],
+                        "expected_output": test[1],
+                    }
+                    for test in hidden_tests_io
+                ],
+                "public_tests_results": [
+                    {
+                        "status": False,
+                        "error_message": "No code was provided.",
+                        "generated_output": None,
+                        "input": test[0],
+                        "expected_output": test[1],
+                    }
+                    for test in public_tests_io
+                ],
+            }
+            return results_dict
+        hidden_tests_results = check_correctness(
+            candidate_solution, python_stub, hidden_tests_io, timeout, debug, add_extra_imports
+        )
+        public_tests_results = check_correctness(
+            candidate_solution, python_stub, public_tests_io, timeout, debug, add_extra_imports
+        )
+        # the compilation status shouldn't depend on the tests
+        if len(hidden_tests_io) > 0 and len(public_tests_io) > 0:
+            assert hidden_tests_results["compilation_status"] == public_tests_results["compilation_status"]
+        compilation_status = True
+        error_message = None
+        timeout_error = False
+        if len(hidden_tests_io) > 0:
+            compilation_status = compilation_status and hidden_tests_results["compilation_status"]
+            error_message = hidden_tests_results["error_message"]
+            timeout_error = timeout_error or hidden_tests_results["timeout_error"]
+        if len(public_tests_io) > 0:
+            compilation_status = compilation_status and public_tests_results["compilation_status"]
+            error_message = public_tests_results["error_message"]
+            timeout_error = timeout_error or public_tests_results["timeout_error"]
+        results_dict = {
+            "compilation_status": compilation_status,
+            "compilation_error_message": error_message,
+            "timeout_error": timeout_error,
+            "hidden_tests_results": hidden_tests_results["results"],
+            "public_tests_results": public_tests_results["results"],
+        }
+        return results_dict
+def check_correctness(
+    candidate_solution: str,
+    python_stub: str,
+    tests: List[Tuple[List[str], str]],
+    timeout: int = 6000,
+    debug=True,
+    add_extra_imports=False,
+):
+    compilation_status = True
+    compilation_error = None
+    results = []
+    timeout_occurred = False
+    for idx, test in enumerate(tests):
+        inp, out, expl = test
+        result = one_test(
+            candidate_solution, python_stub, inp, out, timeout=timeout, debug=debug, add_extra_imports=add_extra_imports
+        )
+        error_message = result["error_message"]
+        if error_message is not None:
+            if "syntaxerror" in error_message.lower():
+                compilation_status = False
+                compilation_error = error_message
+            if "timeout" in error_message.lower():
+                timeout_occurred = True
+        results.append(result)
+        if timeout_occurred:
+            break
+    if timeout_occurred:
+        return {
+            "compilation_status": True,
+            "timeout_error": True,
+            "error_message": "Timeout error.",
+            "results": results,
+        }
+    return {
+        "compilation_status": compilation_status,
+        "timeout_error": False,
+        "error_message": compilation_error,
+        "results": results,
+    }
+def one_test(candidate_solution, python_stub, inp, out, timeout=10, debug=False, add_extra_imports=False):
+    python_stub = python_stub.strip()
+    candidate_solution = candidate_solution.strip()
+    out = out.replace("null", "None").replace("true", "True").replace("false", "False")
+    # reformat the solution and parse class and method name
+    class_def, signature = python_stub.split("    def ")
+    class_name = class_def.split("class ")[1].strip().rstrip(":")
+    func_name, _ = signature.split("(")
+    # reformatting the input
+    first_param = r"^\w+\s\=\s"
+    later_params = r",\s\w+\s\=\s"
+    inp = re.sub(first_param, "", inp)
+    inp = re.sub(later_params, ", ", inp)
+    # we add custom code to invoke the solution
+    before_output = "AFTER THIS COMES OUR OWN GENERATED OUTPUT !@#!@!"
+    after_output = "AFTER THIS COMES OUR VERDICT !@#!@!"
+    if add_extra_imports:
+        sol = f"""
+from collections import *
+from math import *
+import math
+from functools import *
+from heapq import *
+import heapq
+import itertools
+from itertools import *
+import bisect
+from bisect import *
+"""
+    else:
+        sol = ""
+    sol += f"""
+from typing import List, Tuple, Optional
+{candidate_solution}
+sfohsdfdsfjhsdkfjhsdkjfh = {class_name}()
+res = sfohsdfdsfjhsdkfjhsdkjfh.{func_name}({inp})
+def nested_list_convert(inp):
+    try:
+        try:
+            inp = list(inp)
+        except BaseException as e:
+            return inp
+        out = []
+        for i in inp:
+            out.append(nested_list_convert(i))
+    except BaseException as e:
+        return inp
+    return out
+matching = False
+matching = matching or res == {out}
+matching = matching or nested_list_convert(res) == {out}
+matching = matching or nested_list_convert(res) == nested_list_convert({out})
+matching = matching or str({out})==str(res).replace("{{","[").replace("(","[").replace("}}","]").replace(")","]")
+matching = matching or str({out})==str(res).replace("{{","[").replace("(","[").replace("}}","]").replace(")","]")
+print("res: ", res)
+print("out: ", {out})
+print("{before_output}")
+print(res)
+print("{after_output}")
+print(matching)
+"""
+    cmd = "python3"
+    proc = Popen([cmd, "-c", sol], stdin=PIPE, stdout=PIPE, stderr=PIPE)
+    result_object = {"input": inp, "expected_output": out.strip('"')}
+    try:
+        stdout, stderr = proc.communicate("", timeout=timeout)
+    except TimeoutExpired as e:
+        if debug:
+            log.info(f"Timeout error, timeout={timeout}")
+        result_object.update({"status": False, "error_message": "Timeout error.", "generated_output": None})
+        return result_object
+    finally:
+        proc.kill()
+    stdout = stdout.decode()
+    stderr = stderr.decode().lower()
+    if stderr == "":
+        # No compilation or runtime error
+        stderr = None
+    else:
+        # Runtime or compilation error (distinction is made by the presence of "syntaxerror" in the error message)
+        result_object.update(**{"status": False, "error_message": stderr, "generated_output": None})
+        return result_object
+    try:
+        generated_output = stdout.split(before_output)[1]
+        generated_output, verdict = generated_output.split(after_output)
+        result_object.update(
+            **{
+                "status": verdict.strip() == "True",
+                "error_message": stderr,
+                "generated_output": generated_output.strip(),
+            }
+        )
+        return result_object
+    except IndexError as e:
+        raise Exception(f"An unexpected error has occurred while parsing the following generated output: {stdout}")
+        # Used in debugging
+        # log.info(e)
+        # result_object.update(
+        #     **{"status": False, "error_message": "The output couldn't be parsed", "generated_output": None}
+        # )
+        # return result_object