from bs4 import BeautifulSoup import markdown import numpy as np import re import pandas as pd def generate_html_report(return_dict): """Generate a clean HTML report focusing on visualizations and key insights""" def convert_markdown_to_html(text): """Convert markdown text to HTML safely""" if not text: return "" # Don't escape HTML characters before markdown conversion html = markdown.markdown(str(text), extensions=['tables', 'fenced_code', 'nl2br']) # Use BeautifulSoup to clean up but preserve structure soup = BeautifulSoup(html, 'html.parser') return str(soup) def convert_conclusion_to_html(text): """Special conversion for conclusion with custom bullet point handling""" if not text: return "" # Clean and prepare text text = str(text).strip() text = re.sub(r'\*\*(.*?)\*\*', r'\1', text) text = re.sub(r'\*(.*?)\*', r'\1', text) # Handle bullet points that might not be properly formatted lines = text.split('\n') processed_lines = [] in_list = False for line in lines: line = line.strip() if not line: if in_list: processed_lines.append('') in_list = False processed_lines.append('') continue # Check if line looks like a bullet point if (line.startswith('- ') or line.startswith('• ') or line.startswith('* ') or re.match(r'^\d+\.\s', line)): if not in_list: processed_lines.append('

{clean_line}

') in_list = False processed_lines.append(f'

{line}

') if in_list: processed_lines.append('') # Join and clean up html_content = '\n'.join(processed_lines) # Clean up extra tags and escape HTML entities, but preserve our intentional HTML html_content = html_content.replace('&', '&').replace('<', '<').replace('>', '>') # Restore our intentional HTML tags html_content = html_content.replace('', '').replace('', '') html_content = html_content.replace('', '').replace('', '') html_content = html_content.replace('<ul>', '

').replace('</ul>', '') html_content = html_content.replace('<li>', '

').replace('</li>', '

') html_content = html_content.replace('', '

').replace('', '

') return html_content # Convert key text sections to HTML goal = convert_markdown_to_html(return_dict['goal']) questions = convert_markdown_to_html(return_dict['deep_questions']) conclusion = convert_conclusion_to_html(return_dict['final_conclusion']) # Remove duplicate conclusion headings and clean up conclusion = re.sub(r'

\s*\*\*\s*Conclusion\s*\*\*\s*

', '', conclusion, flags=re.IGNORECASE) conclusion = re.sub(r'\s*Conclusion\s*', '', conclusion, flags=re.IGNORECASE) conclusion = re.sub(r']*>\s*Conclusion\s*', '', conclusion, flags=re.IGNORECASE) conclusion = re.sub(r'^\s*Conclusion\s*$', '', conclusion, flags=re.MULTILINE) # Combine synthesis content synthesis_content = '' if return_dict.get('synthesis'): synthesis_content = ''.join(f'

{convert_markdown_to_html(s)}

' for s in return_dict['synthesis']) # Generate all visualizations for synthesis section all_visualizations = [] if return_dict['plotly_figs']: for fig_group in return_dict['plotly_figs']: try: if isinstance(fig_group, list): # Handle list of figures for fig in fig_group: if hasattr(fig, 'to_html'): # It's a Plotly Figure object all_visualizations.append(fig.to_html( full_html=False, include_plotlyjs='cdn', config={'displayModeBar': True} )) elif isinstance(fig, str): # It might be JSON format - try to convert try: import plotly.io fig_obj = plotly.io.from_json(fig) all_visualizations.append(fig_obj.to_html( full_html=False, include_plotlyjs='cdn', config={'displayModeBar': True} )) except Exception as e: print(f"Warning: Could not process figure JSON: {e}") continue else: # Single figure if hasattr(fig_group, 'to_html'): # It's a Plotly Figure object all_visualizations.append(fig_group.to_html( full_html=False, include_plotlyjs='cdn', config={'displayModeBar': True} )) elif isinstance(fig_group, str): # It might be JSON format - try to convert try: import plotly.io fig_obj = plotly.io.from_json(fig_group) all_visualizations.append(fig_obj.to_html( full_html=False, include_plotlyjs='cdn', config={'displayModeBar': True} )) except Exception as e: print(f"Warning: Could not process figure JSON: {e}") continue except Exception as e: print(f"Warning: Error processing visualizations: {e}") # Prepare code for syntax highlighting code_content = return_dict.get('code', '').strip() html = f""" Deep Analysis Report

Deep Analysis Report

Original Question

{goal}

Detailed Research Questions

{questions}

Analysis & Insights

{synthesis_content}

{''.join(f'

{viz}

' for viz in all_visualizations) if all_visualizations else '

No visualizations generated

{f'''

Generated Code

View Generated Code (Click to expand)

{code_content}

''' if code_content else ''}

Conclusion

{conclusion}

""" return html