Soumik555 commited on
Commit
fe7ef49
·
1 Parent(s): f50cd2e

added csv-agent-default in next.js

Browse files
Files changed (1) hide show
  1. python_code_interpreter_service.py +55 -65
python_code_interpreter_service.py CHANGED
@@ -6,6 +6,7 @@ import os
6
  import base64
7
  from pathlib import Path
8
  import uuid
 
9
  import numpy as np
10
  import pandas as pd
11
  import matplotlib
@@ -26,7 +27,7 @@ import plotly.express as px
26
  import plotly.graph_objects as go
27
  from plotly.io import to_html
28
  import openpyxl
29
- print(openpyxl.__version__)
30
 
31
 
32
  def execute_python_code(code: str, df: pd.DataFrame = None) -> Dict[str, Any]:
@@ -53,6 +54,7 @@ def execute_python_code(code: str, df: pd.DataFrame = None) -> Dict[str, Any]:
53
  plot_base64 = []
54
  variables = {}
55
  html_charts = []
 
56
 
57
  # Monkey patch plt.show() to save figures
58
  original_show = plt.show
@@ -60,7 +62,6 @@ def execute_python_code(code: str, df: pd.DataFrame = None) -> Dict[str, Any]:
60
  def custom_show():
61
  for i, fig in enumerate(plt.get_fignums()):
62
  figure = plt.figure(fig)
63
- # Save plot to bytes buffer instead of file
64
  buf = io.BytesIO()
65
  figure.savefig(buf, format='png', bbox_inches='tight')
66
  buf.seek(0)
@@ -71,27 +72,51 @@ def execute_python_code(code: str, df: pd.DataFrame = None) -> Dict[str, Any]:
71
  original_plotly_show = go.Figure.show
72
 
73
  def custom_plotly_show(fig, *args, **kwargs):
74
- # Generate unique filename
75
  chart_id = str(uuid.uuid4())
76
  filename = f"chart_{chart_id}.html"
77
  filepath = charts_dir / filename
78
-
79
- # Save as HTML
80
  html = to_html(fig, include_plotlyjs='cdn')
81
  with open(filepath, 'w', encoding='utf-8') as f:
82
  f.write(html)
83
-
84
- # Add to html_charts list
85
  html_charts.append(filename)
86
-
87
- # Close the figure to free memory
88
- fig._grid_ref = None # Help with memory cleanup
89
  return None
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  try:
92
- # Create a comprehensive execution context with all common data science libraries
 
 
 
93
  exec_globals = {
94
- # Core libraries
95
  'np': np,
96
  'pd': pd,
97
  'plt': plt,
@@ -100,34 +125,25 @@ def execute_python_code(code: str, df: pd.DataFrame = None) -> Dict[str, Any]:
100
  'stats': stats,
101
  'sklearn': sklearn,
102
  'tabulate': tabulate,
103
- 'openpyxl': openpyxl,
104
-
105
- # Plotly libraries
106
  'px': px,
107
  'go': go,
108
-
109
- # Date/time libraries
110
  'datetime': datetime,
111
  'parser': parser,
112
  'pytz': pytz,
113
-
114
- # Utility
115
  'os': os,
116
  'sys': sys,
117
  'warnings': warnings,
118
  'json': json,
119
-
120
- # File paths
 
121
  'DATA_DIR': data_dir,
122
  'CHARTS_DIR': charts_dir,
123
-
124
- # Provided DataFrame
125
  'df': df,
126
-
127
  '__builtins__': __builtins__,
128
  }
129
 
130
- # Add common sklearn components
131
  from sklearn import (
132
  datasets, preprocessing, model_selection,
133
  linear_model, ensemble, metrics, svm,
@@ -146,26 +162,24 @@ def execute_python_code(code: str, df: pd.DataFrame = None) -> Dict[str, Any]:
146
  'feature_selection': feature_selection,
147
  })
148
 
149
- # Replace plt.show with custom implementation
150
  plt.show = custom_show
151
-
152
- # Replace plotly figure's show method
153
  go.Figure.show = custom_plotly_show
154
 
155
- # Execute code and capture output
156
  with contextlib.redirect_stdout(stdout):
157
- # First execute to get variables
158
  exec(code, exec_globals)
159
 
160
- # Capture all variables that were created
161
  for name, value in exec_globals.items():
162
  if not name.startswith('_') and name not in [
163
  'np', 'pd', 'plt', 'sns', 'sm', 'stats', 'sklearn',
164
  'px', 'go', 'datetime', 'parser', 'pytz', 'holidays',
165
  'os', 'sys', 'warnings', 'json', 'DATA_DIR', 'CHARTS_DIR',
166
  'datasets', 'preprocessing', 'model_selection', 'linear_model',
167
- 'ensemble', 'metrics', 'svm', 'decomposition', 'cluster', 'openpyxl',
168
- 'feature_selection', 'df' # Exclude our parameter from variables
 
169
  ]:
170
  variables[name] = value
171
 
@@ -177,13 +191,12 @@ def execute_python_code(code: str, df: pd.DataFrame = None) -> Dict[str, Any]:
177
  "traceback": traceback.format_exc()
178
  }
179
  finally:
180
- # Restore original plt.show
181
  plt.show = original_show
182
- # Restore original plotly show
183
  go.Figure.show = original_plotly_show
 
184
 
185
-
186
- # Convert various objects to serializable formats
187
  def convert_objects(obj):
188
  if isinstance(obj, (np.ndarray, np.generic)):
189
  return obj.tolist() if obj.size > 1 else obj.item()
@@ -214,34 +227,11 @@ def execute_python_code(code: str, df: pd.DataFrame = None) -> Dict[str, Any]:
214
  return f"<function {obj.__name__}>"
215
  return obj
216
 
217
- processed_vars = {}
218
- for k, v in variables.items():
219
- try:
220
- processed_vars[k] = convert_objects(v)
221
- except Exception as e:
222
- processed_vars[k] = f"<Unable to serialize: {str(e)}>"
223
-
224
- # Check for generated Excel files and include them in the response
225
- # In your execute_python_code function, modify the Excel file handling part:
226
-
227
- excel_files = []
228
- for file in data_dir.glob('*.xlsx'):
229
- try:
230
- with open(file, 'rb') as f:
231
- excel_content = base64.b64encode(f.read()).decode('utf-8')
232
- excel_files.append({
233
- 'filename': file.name,
234
- 'content': excel_content,
235
- 'content_type': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
236
- })
237
- # Clean up the file after reading
238
- file.unlink()
239
- except Exception as e:
240
- excel_files.append({
241
- 'filename': file.name,
242
- 'error': f"Failed to process Excel file: {str(e)}"
243
- })
244
-
245
  return {
246
  'output': output,
247
  'error': error,
 
6
  import base64
7
  from pathlib import Path
8
  import uuid
9
+ import time
10
  import numpy as np
11
  import pandas as pd
12
  import matplotlib
 
27
  import plotly.graph_objects as go
28
  from plotly.io import to_html
29
  import openpyxl
30
+
31
 
32
 
33
  def execute_python_code(code: str, df: pd.DataFrame = None) -> Dict[str, Any]:
 
54
  plot_base64 = []
55
  variables = {}
56
  html_charts = []
57
+ excel_files = []
58
 
59
  # Monkey patch plt.show() to save figures
60
  original_show = plt.show
 
62
  def custom_show():
63
  for i, fig in enumerate(plt.get_fignums()):
64
  figure = plt.figure(fig)
 
65
  buf = io.BytesIO()
66
  figure.savefig(buf, format='png', bbox_inches='tight')
67
  buf.seek(0)
 
72
  original_plotly_show = go.Figure.show
73
 
74
  def custom_plotly_show(fig, *args, **kwargs):
 
75
  chart_id = str(uuid.uuid4())
76
  filename = f"chart_{chart_id}.html"
77
  filepath = charts_dir / filename
 
 
78
  html = to_html(fig, include_plotlyjs='cdn')
79
  with open(filepath, 'w', encoding='utf-8') as f:
80
  f.write(html)
 
 
81
  html_charts.append(filename)
82
+ fig._grid_ref = None
 
 
83
  return None
84
 
85
+ # Monkey patch pd.ExcelWriter to capture Excel files
86
+ original_ExcelWriter = pd.ExcelWriter
87
+
88
+ def custom_ExcelWriter(*args, **kwargs):
89
+ # Force openpyxl engine if no engine specified
90
+ if 'engine' not in kwargs:
91
+ kwargs['engine'] = 'openpyxl'
92
+ # Create in-memory file
93
+ excel_buffer = io.BytesIO()
94
+ kwargs['path'] = excel_buffer
95
+ writer = original_ExcelWriter(*args, **kwargs)
96
+
97
+ # Add cleanup and capture logic
98
+ def save():
99
+ writer.close()
100
+ excel_buffer.seek(0)
101
+ excel_content = base64.b64encode(excel_buffer.read()).decode('utf-8')
102
+ filename = args[0] if len(args) > 0 else kwargs.get('path', 'output.xlsx')
103
+ if isinstance(filename, Path):
104
+ filename = filename.name
105
+ excel_files.append({
106
+ 'filename': filename,
107
+ 'content': excel_content,
108
+ 'content_type': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
109
+ })
110
+
111
+ writer.save = save
112
+ return writer
113
+
114
  try:
115
+ # Patch ExcelWriter before execution
116
+ pd.ExcelWriter = custom_ExcelWriter
117
+
118
+ # Create execution context
119
  exec_globals = {
 
120
  'np': np,
121
  'pd': pd,
122
  'plt': plt,
 
125
  'stats': stats,
126
  'sklearn': sklearn,
127
  'tabulate': tabulate,
 
 
 
128
  'px': px,
129
  'go': go,
 
 
130
  'datetime': datetime,
131
  'parser': parser,
132
  'pytz': pytz,
 
 
133
  'os': os,
134
  'sys': sys,
135
  'warnings': warnings,
136
  'json': json,
137
+ 'pd.ExcelWriter': pd.ExcelWriter,
138
+ 'time': time,
139
+ 'openpyxl': openpyxl,
140
  'DATA_DIR': data_dir,
141
  'CHARTS_DIR': charts_dir,
 
 
142
  'df': df,
 
143
  '__builtins__': __builtins__,
144
  }
145
 
146
+ # Add sklearn components
147
  from sklearn import (
148
  datasets, preprocessing, model_selection,
149
  linear_model, ensemble, metrics, svm,
 
162
  'feature_selection': feature_selection,
163
  })
164
 
165
+ # Replace show methods
166
  plt.show = custom_show
 
 
167
  go.Figure.show = custom_plotly_show
168
 
169
+ # Execute code
170
  with contextlib.redirect_stdout(stdout):
 
171
  exec(code, exec_globals)
172
 
173
+ # Capture variables
174
  for name, value in exec_globals.items():
175
  if not name.startswith('_') and name not in [
176
  'np', 'pd', 'plt', 'sns', 'sm', 'stats', 'sklearn',
177
  'px', 'go', 'datetime', 'parser', 'pytz', 'holidays',
178
  'os', 'sys', 'warnings', 'json', 'DATA_DIR', 'CHARTS_DIR',
179
  'datasets', 'preprocessing', 'model_selection', 'linear_model',
180
+ 'ensemble', 'metrics', 'svm', 'decomposition', 'cluster',
181
+ 'feature_selection', 'df', '__builtins__', 'pd.ExcelWriter',
182
+ 'time', 'openpyxl'
183
  ]:
184
  variables[name] = value
185
 
 
191
  "traceback": traceback.format_exc()
192
  }
193
  finally:
194
+ # Restore original functions
195
  plt.show = original_show
 
196
  go.Figure.show = original_plotly_show
197
+ pd.ExcelWriter = original_ExcelWriter
198
 
199
+ # Convert variables to serializable formats
 
200
  def convert_objects(obj):
201
  if isinstance(obj, (np.ndarray, np.generic)):
202
  return obj.tolist() if obj.size > 1 else obj.item()
 
227
  return f"<function {obj.__name__}>"
228
  return obj
229
 
230
+ processed_vars = {
231
+ k: convert_objects(v)
232
+ for k, v in variables.items()
233
+ }
234
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
  return {
236
  'output': output,
237
  'error': error,