File size: 12,485 Bytes
1c66926
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b14a8b
1c66926
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b14a8b
 
 
 
 
 
1c66926
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b14a8b
1c66926
 
 
 
 
 
 
 
 
 
 
 
 
0b14a8b
1c66926
 
0b14a8b
 
1c66926
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b14a8b
1c66926
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b14a8b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
import re
import json
import sys
import contextlib
from io import StringIO
import time
import logging
from src.utils.logger import Logger
import textwrap

logger = Logger(__name__, level="INFO", see_time=False, console_log=False)

@contextlib.contextmanager
def stdoutIO(stdout=None):
    old = sys.stdout
    if stdout is None:
        stdout = StringIO()
    sys.stdout = stdout
    yield stdout
    sys.stdout = old
    
    
def clean_print_statements(code_block):
    """
    This function cleans up any `print()` statements that might contain unwanted `\n` characters.
    It ensures print statements are properly formatted without unnecessary newlines.
    """
    # This regex targets print statements, even if they have newlines inside
    return re.sub(r'print\((.*?)(\\n.*?)(.*?)\)', r'print(\1\3)', code_block, flags=re.DOTALL)

def remove_code_block_from_summary(summary):
    # use regex to remove code block from summary list
    summary = re.sub(r'```python\n(.*?)\n```', '', summary)
    return summary.split("\n")

def remove_main_block(code):
    # Match the __main__ block
    pattern = r'(?m)^if\s+__name__\s*==\s*["\']__main__["\']\s*:\s*\n((?:\s+.*\n?)*)'
    
    match = re.search(pattern, code)
    if match:
        main_block = match.group(1)
        
        # Dedent the code block inside __main__
        dedented_block = textwrap.dedent(main_block)
        
        # Remove \n from any print statements in the block (also handling multiline print cases)
        dedented_block = clean_print_statements(dedented_block)
        # Replace the block in the code
        cleaned_code = re.sub(pattern, dedented_block, code)
        
        # Optional: Remove leading newlines if any
        cleaned_code = cleaned_code.strip()
        
        return cleaned_code
    return code


def format_code_block(code_str):
    code_clean = re.sub(r'^```python\n?', '', code_str, flags=re.MULTILINE)
    code_clean = re.sub(r'\n```$', '', code_clean)
    return f'\n{code_clean}\n'

def format_code_backticked_block(code_str):
    code_clean = re.sub(r'^```python\n?', '', code_str, flags=re.MULTILINE)
    code_clean = re.sub(r'\n```$', '', code_clean)
    # Only match assignments at top level (not indented)
    # 1. Remove 'df = pd.DataFrame()' if it's at the top level
  
  
    # Remove reading the csv file if it's already in the context
    modified_code = re.sub(r"df\s*=\s*pd\.read_csv\([\"\'].*?[\"\']\).*?(\n|$)", '', code_clean)
    
    # Only match assignments at top level (not indented)
    # 1. Remove 'df = pd.DataFrame()' if it's at the top level
    modified_code = re.sub(
        r"^df\s*=\s*pd\.DataFrame\(\s*\)\s*(#.*)?$",
        '',
        modified_code,
        flags=re.MULTILINE
    )

    # # Remove sample dataframe lines with multiple array values
    modified_code = re.sub(r"^# Sample DataFrames?.*?(\n|$)", '', modified_code, flags=re.MULTILINE | re.IGNORECASE)
    
    # # Remove plt.show() statements
    modified_code = re.sub(r"plt\.show\(\).*?(\n|$)", '', modified_code)
    
    
    # remove main
    code_clean = remove_main_block(modified_code)
    
    return f'```python\n{code_clean}\n```'

    
# In format_response.py, modify the execute_code function:
def execute_code_from_markdown(code_str, dataframe=None):
    import pandas as pd
    import plotly.express as px
    import plotly
    import plotly.graph_objects as go
    import matplotlib.pyplot as plt
    import seaborn as sns
    import numpy as np

    context = {
        'pd': pd,
        'px': px,
        'go': go,
        'plt': plt,
        'plotly': plotly,
        '__builtins__': __builtins__,
        '__import__': __import__,
        'sns': sns,
        'np': np,
        'json_outputs': []  # List to store multiple Plotly JSON outputs
    }

    # If a dataframe is provided, add it to the context
    if dataframe is not None:
        context['df'] = dataframe
    
    # Modify code to store multiple JSON outputs
    modified_code = re.sub(
        r'(\w*_?)fig(\w*)\.show\(\)',
        r'json_outputs.append(plotly.io.to_json(\1fig\2, pretty=True))',
        code_str
    )

    modified_code = re.sub(
        r'(\w*_?)fig(\w*)\.to_html\(.*?\)',
        r'json_outputs.append(plotly.io.to_json(\1fig\2, pretty=True))',
        modified_code
    )
    
    # Remove reading the csv file if it's already in the context
    modified_code = re.sub(r"df\s*=\s*pd\.read_csv\([\"\'].*?[\"\']\).*?(\n|$)", '', modified_code)
    
    # Only match assignments at top level (not indented)
    # 1. Remove 'df = pd.DataFrame()' if it's at the top level
    modified_code = re.sub(
        r"^df\s*=\s*pd\.DataFrame\(\s*\)\s*(#.*)?$",
        '',
        modified_code,
        flags=re.MULTILINE
    )



    # # Remove sample dataframe lines with multiple array values
    modified_code = re.sub(r"^# Sample DataFrames?.*?(\n|$)", '', modified_code, flags=re.MULTILINE | re.IGNORECASE)
        
    # # Remove plt.show() statements
    modified_code = re.sub(r"plt\.show\(\).*?(\n|$)", '', modified_code)
    

    # Only add df = pd.read_csv() if no dataframe was provided and the code contains pd.read_csv
    if dataframe is None and 'pd.read_csv' not in modified_code:
        modified_code = re.sub(
            r'import pandas as pd',
            r'import pandas as pd\n\n# Read Housing.csv\ndf = pd.read_csv("Housing.csv")',
            modified_code
        )

    try:
        with stdoutIO() as s:
            exec(modified_code, context)  # Execute the modified code
        output = s.getvalue()
        json_outputs = context.get('json_outputs', [])

        return output, json_outputs
    except Exception as e:
        return "Error executing code: " + str(e), []


def format_response_to_markdown(api_response, agent_name = None, dataframe=None):
    try:
        markdown = []
        logger.log_message(f"API response for {agent_name} at {time.strftime('%Y-%m-%d %H:%M:%S')}: {api_response}", level=logging.INFO)

        if isinstance(api_response, dict):
            for key in api_response:
                if "error" in api_response[key]:
                    return f"**Error**: Rate limit exceeded. Please try switching models from the settings."
                # You can add more checks here if needed for other keys
                       
        # Handle error responses
        if isinstance(api_response, dict) and "error" in api_response:
            return f"**Error**: {api_response['error']}"
        if "response" in api_response and isinstance(api_response['response'], str):
            if any(err in api_response['response'].lower() for err in ["auth", "api", "lm"]):
                return "**Error**: Authentication failed. Please check your API key in settings and try again."
            if "model" in api_response['response'].lower():
                return "**Error**: Model configuration error. Please verify your model selection in settings."

        for agent, content in api_response.items():
            agent = agent.split("__")[0] if "__" in agent else agent
            if "memory" in agent or not content:
                continue
                
            markdown.append(f"\n## {agent.replace('_', ' ').title()}\n")
            
            if agent == "analytical_planner":
                if 'plan_desc' in content:
                    markdown.append(f"### Reasoning\n{content['plan_desc']}\n")
                else:
                    markdown.append(f"### Reasoning\n{content['rationale']}\n")
            else:  
                if "rationale" in content:
                    markdown.append(f"### Reasoning\n{content['rationale']}\n")

            if 'code' in content:
                markdown.append(f"### Code Implementation\n{format_code_backticked_block(content['code'])}\n")
                if agent_name is not None:
                    # execute the code
                    clean_code = format_code_block(content['code'])
                    output, json_outputs = execute_code_from_markdown(clean_code, dataframe)
                    if output:
                        markdown.append("### Execution Output\n")
                        markdown.append(f"```output\n{output}\n```\n")

                    if json_outputs:
                        markdown.append("### Plotly JSON Outputs\n")
                        for idx, json_output in enumerate(json_outputs):
                            if len(json_output) > 1000000:  # If JSON is larger than 1MB
                                logger.log_message(f"Large JSON output detected: {len(json_output)} bytes", level=logging.WARNING)
                            markdown.append(f"```plotly\n{json_output}\n```\n")

            if 'summary' in content:
                # make the summary a bullet-point list
                summary_lines = remove_code_block_from_summary(content['summary'])
                summary_lines = content['summary'].split('\n')
                # remove code block from summary
                markdown.append("### Summary\n")
                for line in summary_lines:
                    if line != "":
                        markdown.append(f"• {line.strip().replace('•', '').replace('-', '').replace('*', '') if line.strip().startswith('•') or line.strip().startswith('-') or line.strip().startswith('*') else line.strip()}\n")

            if 'refined_complete_code' in content and 'summary' in content:
                try:
                    if content['refined_complete_code'] is not None and content['refined_complete_code'] != "":
                        clean_code = format_code_block(content['refined_complete_code']) 
                        markdown_code = format_code_backticked_block(content['refined_complete_code'])
                        output, json_outputs = execute_code_from_markdown(clean_code, dataframe)
                    elif "```python" in content['summary']:
                        clean_code = format_code_block(content['summary'])
                        markdown_code = format_code_backticked_block(content['summary'])
                        output, json_outputs = execute_code_from_markdown(clean_code, dataframe)
                except Exception as e:
                    logger.log_message(f"Error in execute_code_from_markdown: {str(e)}", level=logging.ERROR)
                    markdown_code = f"**Error**: {str(e)}"
                    # continue
                
                if markdown_code is not None:
                    markdown.append(f"### Refined Complete Code\n{markdown_code}\n")
                
                if output:
                    markdown.append("### Execution Output\n")
                    markdown.append(f"```output\n{output}\n```\n")
                    
                if json_outputs:
                    markdown.append("### Plotly JSON Outputs\n")
                    for idx, json_output in enumerate(json_outputs):
                        markdown.append(f"```plotly\n{json_output}\n```\n")
            # if agent_name is not None:  
            #     if f"memory_{agent_name}" in api_response:
            #         markdown.append(f"### Memory\n{api_response[f'memory_{agent_name}']}\n")

    except Exception as e:
        logger.log_message(f"Error in format_response_to_markdown: {str(e)}", level=logging.ERROR)
        return f"{str(e)}"
        
    # logger.log_message(f"Generated markdown content for agent '{agent_name}' at {time.strftime('%Y-%m-%d %H:%M:%S')}: {markdown}, length: {len(markdown)}", level=logging.INFO)
    
    if not markdown or len(markdown) <= 1:
        logger.log_message(f"Generated markdown (ERROR) content for agent '{agent_name}' at {time.strftime('%Y-%m-%d %H:%M:%S')}: {markdown}, length: {len(markdown)}", level=logging.INFO)
        return "Please provide a valid query..."
        
    return '\n'.join(markdown)

# Example usage with dummy data
if __name__ == "__main__":
    sample_response = {
        "code_combiner_agent": {
            "reasoning": "Sample reasoning for multiple charts.",
            "refined_complete_code": """
```python
import plotly.express as px
import pandas as pd

# Sample Data
df = pd.DataFrame({'Category': ['A', 'B', 'C'], 'Values': [10, 20, 30]})

# First Chart
fig = px.bar(df, x='Category', y='Values', title='Bar Chart')
fig.show()

# Second Chart
fig2 = px.pie(df, values='Values', names='Category', title='Pie Chart')
fig2.show()
```
"""
        }
    }

    formatted_md = format_response_to_markdown(sample_response)