from bs4 import BeautifulSoup import markdown import numpy as np import re import pandas as pd def generate_html_report(return_dict): """Generate a clean HTML report focusing on visualizations and key insights""" def convert_markdown_to_html(text): """Convert markdown text to HTML safely""" if not text: return "" # Don't escape HTML characters before markdown conversion html = markdown.markdown(str(text), extensions=['tables', 'fenced_code', 'nl2br']) # Use BeautifulSoup to clean up but preserve structure soup = BeautifulSoup(html, 'html.parser') return str(soup) def convert_conclusion_to_html(text): """Special conversion for conclusion with custom bullet point handling""" if not text: return "" # Clean and prepare text text = str(text).strip() text = re.sub(r'\*\*(.*?)\*\*', r'\1', text) text = re.sub(r'\*(.*?)\*', r'\1', text) # Handle bullet points that might not be properly formatted lines = text.split('\n') processed_lines = [] in_list = False for line in lines: line = line.strip() if not line: if in_list: processed_lines.append('') in_list = False processed_lines.append('') continue # Check if line looks like a bullet point if (line.startswith('- ') or line.startswith('• ') or line.startswith('* ') or re.match(r'^\d+\.\s', line)): if not in_list: processed_lines.append('') in_list = False processed_lines.append(f'

{line}

') if in_list: processed_lines.append('') # Join and clean up html_content = '\n'.join(processed_lines) # Clean up extra tags and escape HTML entities, but preserve our intentional HTML html_content = html_content.replace('&', '&').replace('<', '<').replace('>', '>') # Restore our intentional HTML tags html_content = html_content.replace('<strong>', '').replace('</strong>', '') html_content = html_content.replace('<em>', '').replace('</em>', '') html_content = html_content.replace('<ul>', '') html_content = html_content.replace('<li>', '
  • ').replace('</li>', '
  • ') html_content = html_content.replace('<p>', '

    ').replace('</p>', '

    ') return html_content # Convert key text sections to HTML goal = convert_markdown_to_html(return_dict['goal']) questions = convert_markdown_to_html(return_dict['deep_questions']) conclusion = convert_conclusion_to_html(return_dict['final_conclusion']) # Remove duplicate conclusion headings and clean up conclusion = re.sub(r'

    \s*\*\*\s*Conclusion\s*\*\*\s*

    ', '', conclusion, flags=re.IGNORECASE) conclusion = re.sub(r'\s*Conclusion\s*', '', conclusion, flags=re.IGNORECASE) conclusion = re.sub(r']*>\s*Conclusion\s*', '', conclusion, flags=re.IGNORECASE) conclusion = re.sub(r'^\s*Conclusion\s*$', '', conclusion, flags=re.MULTILINE) # Combine synthesis content synthesis_content = '' if return_dict.get('synthesis'): synthesis_content = ''.join(f'
    {convert_markdown_to_html(s)}
    ' for s in return_dict['synthesis']) # Generate all visualizations for synthesis section all_visualizations = [] if return_dict['plotly_figs']: for fig_group in return_dict['plotly_figs']: try: if isinstance(fig_group, list): # Handle list of figures for fig in fig_group: if hasattr(fig, 'to_html'): # It's a Plotly Figure object all_visualizations.append(fig.to_html( full_html=False, include_plotlyjs='cdn', config={'displayModeBar': True} )) elif isinstance(fig, str): # It might be JSON format - try to convert try: import plotly.io fig_obj = plotly.io.from_json(fig) all_visualizations.append(fig_obj.to_html( full_html=False, include_plotlyjs='cdn', config={'displayModeBar': True} )) except Exception as e: print(f"Warning: Could not process figure JSON: {e}") continue else: # Single figure if hasattr(fig_group, 'to_html'): # It's a Plotly Figure object all_visualizations.append(fig_group.to_html( full_html=False, include_plotlyjs='cdn', config={'displayModeBar': True} )) elif isinstance(fig_group, str): # It might be JSON format - try to convert try: import plotly.io fig_obj = plotly.io.from_json(fig_group) all_visualizations.append(fig_obj.to_html( full_html=False, include_plotlyjs='cdn', config={'displayModeBar': True} )) except Exception as e: print(f"Warning: Could not process figure JSON: {e}") continue except Exception as e: print(f"Warning: Error processing visualizations: {e}") # Prepare code for syntax highlighting code_content = return_dict.get('code', '').strip() html = f""" Deep Analysis Report

    Deep Analysis Report

    Original Question

    {goal}

    Detailed Research Questions

    {questions}

    Analysis & Insights

    {synthesis_content}
    {''.join(f'
    {viz}
    ' for viz in all_visualizations) if all_visualizations else '

    No visualizations generated

    '}
    {f'''

    Generated Code

    View Generated Code (Click to expand)
    {code_content}
    ''' if code_content else ''}

    Conclusion

    {conclusion}
    """ return html