Spaces:

FireBird-Tech
/

auto-analyst-backend

Running on CPU Upgrade

App Files Files

Arslan1997 commited on Sep 16

Commit

463a6a7

1 Parent(s): f04f084

lefg

Browse files

Files changed (1) hide show

src/routes/code_routes.py +29 -118

src/routes/code_routes.py CHANGED Viewed

@@ -283,26 +283,17 @@ def extract_relevant_error_section(error_message: str) -> str:
 async def fix_code_with_dspy(code: str, error: str, dataset_context: str = "", datasets: dict = None):
     """
-    Fix code using DSPy with dataset context and actual datasets
     """
     try:
-        # Create score function with actual datasets
-        def create_score_code_with_datasets(datasets_dict):
-            def score_code_with_datasets(args, pred):
-                return score_code(args, pred, datasets=datasets_dict)  # Fixed: use datasets= instead of session_state_datasets=
-            return score_code_with_datasets
-        # Create refine_fixer with datasets
-        if datasets:
-            score_fn = create_score_code_with_datasets(datasets)
-        else:
-            score_fn = score_code  # Fallback to original function
         refine_fixer = dspy.Refine(
-            module=dspy.Predict(code_fix),
             N=3,
             threshold=1.0,
-            reward_fn=score_fn,
             fail_count=3
         )
@@ -311,115 +302,35 @@ async def fix_code_with_dspy(code: str, error: str, dataset_context: str = "", d
         if not anthropic_key:
             raise ValueError("ANTHROPIC_API_KEY environment variable is not set")
-        # Find the blocks with errors
-        faulty_blocks = identify_error_blocks(code, error)
-        if not faulty_blocks:
-            # If no specific errors found, fix the entire code using refine
-            try:
-                # Create the LM instance that will be used
-                # thread_lm = dspy.LM("anthropic/claude-3-5-sonnet-latest", api_key=anthropic_key, max_tokens=2500)
-                thread_lm = MODEL_OBJECTS['claude-3-5-sonnet-latest']
-                # Define the blocking function to run in thread
-                def run_refine_fixer():
-                    with dspy.context(lm=thread_lm):
-                        return refine_fixer(
-                            dataset_context=str(dataset_context) or "",
-                            faulty_code=str(code) or "",
-                            error=str(error) or "",
-                        )
-                # Use asyncio.to_thread for better async integration
-                result = await asyncio.to_thread(run_refine_fixer)
-                return result.fixed_code
-            except Exception as e:
-                logger.log_message(f"Error during refine code fixing: {str(e)}", level=logging.ERROR)
-                raise e
-        # Start with the original code
-        result_code = code.replace("```python", "").replace("```", "")
-        # Fix each faulty block separately using async refine
         try:
             thread_lm = MODEL_OBJECTS['claude-3-5-sonnet-latest']
-            for agent_name, block_code, specific_error in faulty_blocks:
-                try:
-                    # Extract inner code between the markers
-                    inner_code_match = re.search(r'#\s+\w+\s+code\s+start\s*\n([\s\S]*?)#\s+\w+\s+code\s+end', block_code)
-                    if not inner_code_match:
-                        continue
-                    inner_code = inner_code_match.group(1).strip()
-                    # Find markers
-                    start_marker_match = re.search(r'(#\s+\w+\s+code\s+start)', block_code)
-                    end_marker_match = re.search(r'(#\s+\w+\s+code\s+end)', block_code)
-                    if not start_marker_match or not end_marker_match:
-                        logger.log_message(f"Could not find start/end markers for {agent_name}", level=logging.WARNING)
-                        continue
-                    start_marker = start_marker_match.group(1)
-                    end_marker = end_marker_match.group(1)
-                    # Extract the error type and actual error message
-                    error_type = ""
-                    error_msg = specific_error
-                    # Look for common error patterns to provide focused context to the LLM
-                    error_type_match = re.search(r'(TypeError|ValueError|AttributeError|IndexError|KeyError|NameError):\s*([^\n]+)', specific_error)
-                    if error_type_match:
-                        error_type = error_type_match.group(1)
-                        error_msg = f"{error_type}: {error_type_match.group(2)}"
-                    # Add problem location if available
-                    if "Problem at this location:" in specific_error:
-                        problem_section = re.search(r'Problem at this location:([\s\S]*?)(?:\n\n|$)', specific_error)
-                        if problem_section:
-                            error_msg = f"{error_msg}\n\nProblem at: {problem_section.group(1).strip()}"
-                    # Define the blocking function to run in thread for this specific block
-                    def run_block_fixer():
-                        with dspy.context(lm=thread_lm):
-                            return refine_fixer(
-                                dataset_context=str(dataset_context) or "",
-                                faulty_code=str(inner_code) or "",
-                                error=str(error_msg) or "",
-                            )
-                    # Use asyncio.to_thread for better async integration
-                    result = await asyncio.to_thread(run_block_fixer)
-                    # Ensure the fixed code is properly stripped and doesn't include markers
-                    fixed_inner_code = result.fixed_code.strip()
-                    if fixed_inner_code.startswith('#') and 'code start' in fixed_inner_code:
-                        # If LLM included markers in response, extract only inner code
-                        inner_match = re.search(r'#\s+\w+\s+code\s+start\s*\n([\s\S]*?)#\s+\w+\s+code\s+end', fixed_inner_code)
-                        if inner_match:
-                            fixed_inner_code = inner_match.group(1).strip()
-                    # Reconstruct the block with fixed code
-                    fixed_block = f"{start_marker}\n\n{fixed_inner_code}\n\n{end_marker}"
-                    # Replace the original block with the fixed block in the full code
-                    result_code = result_code.replace(block_code, fixed_block)
-                except Exception as e:
-                    # Log the error but continue with other blocks
-                    logger.log_message(f"Error fixing {agent_name} block: {str(e)}", level=logging.ERROR)
-                    continue
         except Exception as e:
-            logger.log_message(f"Error during async code fixing: {str(e)}", level=logging.ERROR)
-            raise e
-        return result_code
     except Exception as e:
         logger.log_message(f"Error in fix_code_with_dspy: {str(e)}", level=logging.ERROR)
-        raise e
 def get_dataset_context(df):
     """

 async def fix_code_with_dspy(code: str, error: str, dataset_context: str = "", datasets: dict = None):
     """
+    Fix code using DSPy Refine with datasets-aware reward function
     """
     try:
+        # Wrap score_code to fix datasets argument
+        reward_fn_with_datasets = lambda args, pred: score_code(args, pred, datasets=datasets)
         refine_fixer = dspy.Refine(
+            module=dspy.Predict(code_fix),
             N=3,
             threshold=1.0,
+            reward_fn=reward_fn_with_datasets,
             fail_count=3
         )
         if not anthropic_key:
             raise ValueError("ANTHROPIC_API_KEY environment variable is not set")
+        # Fix the entire code using refine
         try:
+            # Create the LM instance that will be used
             thread_lm = MODEL_OBJECTS['claude-3-5-sonnet-latest']
+            # Define the blocking function to run in thread
+            def run_refine_fixer():
+                with dspy.context(lm=thread_lm):
+                    return refine_fixer(
+                        dataset_context=str(dataset_context) or "",
+                        faulty_code=str(code) or "",
+                        error=str(error) or "",
+                    )
+            # Use asyncio.to_thread for better async integration
+            result = await asyncio.to_thread(run_refine_fixer)
+            if not hasattr(result, 'fixed_code'):
+                raise ValueError("DSPy Refine did not return a result with 'fixed_code' attribute")
+            return result.fixed_code
         except Exception as e:
+            logger.log_message(f"Error during refine code fixing: {str(e)}", level=logging.ERROR)
+            raise RuntimeError(f"Code fixing failed: {str(e)}") from e
     except Exception as e:
         logger.log_message(f"Error in fix_code_with_dspy: {str(e)}", level=logging.ERROR)
+        raise RuntimeError(f"Fix code setup failed: {str(e)}") from e
 def get_dataset_context(df):
     """