Spaces:

cronjob-python
/

backend

Running

App Files Files Community

Soumik555 commited on Jul 14, 2025

Commit

fe7ef49

1 Parent(s): f50cd2e

added csv-agent-default in next.js

Browse files

Files changed (1) hide show

python_code_interpreter_service.py +55 -65

python_code_interpreter_service.py CHANGED Viewed

@@ -6,6 +6,7 @@ import os
 import base64
 from pathlib import Path
 import uuid
 import numpy as np
 import pandas as pd
 import matplotlib
@@ -26,7 +27,7 @@ import plotly.express as px
 import plotly.graph_objects as go
 from plotly.io import to_html
 import openpyxl
-print(openpyxl.__version__)
 def execute_python_code(code: str, df: pd.DataFrame = None) -> Dict[str, Any]:
@@ -53,6 +54,7 @@ def execute_python_code(code: str, df: pd.DataFrame = None) -> Dict[str, Any]:
     plot_base64 = []
     variables = {}
     html_charts = []
     # Monkey patch plt.show() to save figures
     original_show = plt.show
@@ -60,7 +62,6 @@ def execute_python_code(code: str, df: pd.DataFrame = None) -> Dict[str, Any]:
     def custom_show():
         for i, fig in enumerate(plt.get_fignums()):
             figure = plt.figure(fig)
-            # Save plot to bytes buffer instead of file
             buf = io.BytesIO()
             figure.savefig(buf, format='png', bbox_inches='tight')
             buf.seek(0)
@@ -71,27 +72,51 @@ def execute_python_code(code: str, df: pd.DataFrame = None) -> Dict[str, Any]:
     original_plotly_show = go.Figure.show
     def custom_plotly_show(fig, *args, **kwargs):
-        # Generate unique filename
         chart_id = str(uuid.uuid4())
         filename = f"chart_{chart_id}.html"
         filepath = charts_dir / filename
-        # Save as HTML
         html = to_html(fig, include_plotlyjs='cdn')
         with open(filepath, 'w', encoding='utf-8') as f:
             f.write(html)
-        # Add to html_charts list
         html_charts.append(filename)
-        # Close the figure to free memory
-        fig._grid_ref = None  # Help with memory cleanup
         return None
     try:
-        # Create a comprehensive execution context with all common data science libraries
         exec_globals = {
-            # Core libraries
             'np': np,
             'pd': pd,
             'plt': plt,
@@ -100,34 +125,25 @@ def execute_python_code(code: str, df: pd.DataFrame = None) -> Dict[str, Any]:
             'stats': stats,
             'sklearn': sklearn,
             'tabulate': tabulate,
-            'openpyxl': openpyxl,
-            # Plotly libraries
             'px': px,
             'go': go,
-            # Date/time libraries
             'datetime': datetime,
             'parser': parser,
             'pytz': pytz,
-            # Utility
             'os': os,
             'sys': sys,
             'warnings': warnings,
             'json': json,
-            # File paths
             'DATA_DIR': data_dir,
             'CHARTS_DIR': charts_dir,
-            # Provided DataFrame
             'df': df,
             '__builtins__': __builtins__,
         }
-        # Add common sklearn components
         from sklearn import (
             datasets, preprocessing, model_selection,
             linear_model, ensemble, metrics, svm,
@@ -146,26 +162,24 @@ def execute_python_code(code: str, df: pd.DataFrame = None) -> Dict[str, Any]:
             'feature_selection': feature_selection,
         })
-        # Replace plt.show with custom implementation
         plt.show = custom_show
-        # Replace plotly figure's show method
         go.Figure.show = custom_plotly_show
-        # Execute code and capture output
         with contextlib.redirect_stdout(stdout):
-            # First execute to get variables
             exec(code, exec_globals)
-            # Capture all variables that were created
             for name, value in exec_globals.items():
                 if not name.startswith('_') and name not in [
                     'np', 'pd', 'plt', 'sns', 'sm', 'stats', 'sklearn',
                     'px', 'go', 'datetime', 'parser', 'pytz', 'holidays',
                     'os', 'sys', 'warnings', 'json', 'DATA_DIR', 'CHARTS_DIR',
                     'datasets', 'preprocessing', 'model_selection', 'linear_model',
-                    'ensemble', 'metrics', 'svm', 'decomposition', 'cluster', 'openpyxl',
-                    'feature_selection', 'df'  # Exclude our parameter from variables
                 ]:
                     variables[name] = value
@@ -177,13 +191,12 @@ def execute_python_code(code: str, df: pd.DataFrame = None) -> Dict[str, Any]:
             "traceback": traceback.format_exc()
         }
     finally:
-        # Restore original plt.show
         plt.show = original_show
-        # Restore original plotly show
         go.Figure.show = original_plotly_show
-    # Convert various objects to serializable formats
     def convert_objects(obj):
         if isinstance(obj, (np.ndarray, np.generic)):
             return obj.tolist() if obj.size > 1 else obj.item()
@@ -214,34 +227,11 @@ def execute_python_code(code: str, df: pd.DataFrame = None) -> Dict[str, Any]:
             return f"<function {obj.__name__}>"
         return obj
-    processed_vars = {}
-    for k, v in variables.items():
-        try:
-            processed_vars[k] = convert_objects(v)
-        except Exception as e:
-            processed_vars[k] = f"<Unable to serialize: {str(e)}>"
-    # Check for generated Excel files and include them in the response
-    # In your execute_python_code function, modify the Excel file handling part:
-    excel_files = []
-    for file in data_dir.glob('*.xlsx'):
-      try:
-        with open(file, 'rb') as f:
-            excel_content = base64.b64encode(f.read()).decode('utf-8')
-            excel_files.append({
-                'filename': file.name,
-                'content': excel_content,
-                'content_type': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
-            })
-        # Clean up the file after reading
-        file.unlink()
-      except Exception as e:
-        excel_files.append({
-            'filename': file.name,
-            'error': f"Failed to process Excel file: {str(e)}"
-        })
     return {
         'output': output,
         'error': error,

 import base64
 from pathlib import Path
 import uuid
+import time
 import numpy as np
 import pandas as pd
 import matplotlib
 import plotly.graph_objects as go
 from plotly.io import to_html
 import openpyxl
 def execute_python_code(code: str, df: pd.DataFrame = None) -> Dict[str, Any]:
     plot_base64 = []
     variables = {}
     html_charts = []
+    excel_files = []
     # Monkey patch plt.show() to save figures
     original_show = plt.show
     def custom_show():
         for i, fig in enumerate(plt.get_fignums()):
             figure = plt.figure(fig)
             buf = io.BytesIO()
             figure.savefig(buf, format='png', bbox_inches='tight')
             buf.seek(0)
     original_plotly_show = go.Figure.show
     def custom_plotly_show(fig, *args, **kwargs):
         chart_id = str(uuid.uuid4())
         filename = f"chart_{chart_id}.html"
         filepath = charts_dir / filename
         html = to_html(fig, include_plotlyjs='cdn')
         with open(filepath, 'w', encoding='utf-8') as f:
             f.write(html)
         html_charts.append(filename)
+        fig._grid_ref = None
         return None
+    # Monkey patch pd.ExcelWriter to capture Excel files
+    original_ExcelWriter = pd.ExcelWriter
+    def custom_ExcelWriter(*args, **kwargs):
+        # Force openpyxl engine if no engine specified
+        if 'engine' not in kwargs:
+            kwargs['engine'] = 'openpyxl'
+        # Create in-memory file
+        excel_buffer = io.BytesIO()
+        kwargs['path'] = excel_buffer
+        writer = original_ExcelWriter(*args, **kwargs)
+        # Add cleanup and capture logic
+        def save():
+            writer.close()
+            excel_buffer.seek(0)
+            excel_content = base64.b64encode(excel_buffer.read()).decode('utf-8')
+            filename = args[0] if len(args) > 0 else kwargs.get('path', 'output.xlsx')
+            if isinstance(filename, Path):
+                filename = filename.name
+            excel_files.append({
+                'filename': filename,
+                'content': excel_content,
+                'content_type': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
+            })
+        writer.save = save
+        return writer
     try:
+        # Patch ExcelWriter before execution
+        pd.ExcelWriter = custom_ExcelWriter
+        # Create execution context
         exec_globals = {
             'np': np,
             'pd': pd,
             'plt': plt,
             'stats': stats,
             'sklearn': sklearn,
             'tabulate': tabulate,
             'px': px,
             'go': go,
             'datetime': datetime,
             'parser': parser,
             'pytz': pytz,
             'os': os,
             'sys': sys,
             'warnings': warnings,
             'json': json,
+            'pd.ExcelWriter': pd.ExcelWriter,
+            'time': time,
+            'openpyxl': openpyxl,
             'DATA_DIR': data_dir,
             'CHARTS_DIR': charts_dir,
             'df': df,
             '__builtins__': __builtins__,
         }
+        # Add sklearn components
         from sklearn import (
             datasets, preprocessing, model_selection,
             linear_model, ensemble, metrics, svm,
             'feature_selection': feature_selection,
         })
+        # Replace show methods
         plt.show = custom_show
         go.Figure.show = custom_plotly_show
+        # Execute code
         with contextlib.redirect_stdout(stdout):
             exec(code, exec_globals)
+            # Capture variables
             for name, value in exec_globals.items():
                 if not name.startswith('_') and name not in [
                     'np', 'pd', 'plt', 'sns', 'sm', 'stats', 'sklearn',
                     'px', 'go', 'datetime', 'parser', 'pytz', 'holidays',
                     'os', 'sys', 'warnings', 'json', 'DATA_DIR', 'CHARTS_DIR',
                     'datasets', 'preprocessing', 'model_selection', 'linear_model',
+                    'ensemble', 'metrics', 'svm', 'decomposition', 'cluster',
+                    'feature_selection', 'df', '__builtins__', 'pd.ExcelWriter',
+                    'time', 'openpyxl'
                 ]:
                     variables[name] = value
             "traceback": traceback.format_exc()
         }
     finally:
+        # Restore original functions
         plt.show = original_show
         go.Figure.show = original_plotly_show
+        pd.ExcelWriter = original_ExcelWriter
+    # Convert variables to serializable formats
     def convert_objects(obj):
         if isinstance(obj, (np.ndarray, np.generic)):
             return obj.tolist() if obj.size > 1 else obj.item()
             return f"<function {obj.__name__}>"
         return obj
+    processed_vars = {
+        k: convert_objects(v)
+        for k, v in variables.items()
+    }
     return {
         'output': output,
         'error': error,