import gradio as gr import json import numpy as np import plotly.graph_objects as go import plotly.express as px from plotly.subplots import make_subplots import pandas as pd from difflib import SequenceMatcher from visualization.ngram_visualizer import create_ngram_visualization from visualization.topic_visualizer import process_and_visualize_topic_analysis # Added import def create_bow_visualization(analysis_results): """ Create visualizations for bag of words analysis results Args: analysis_results (dict): Analysis results from the bow analysis Returns: list: List of gradio components with visualizations """ # Parse analysis results if it's a string if isinstance(analysis_results, str): try: results = json.loads(analysis_results) except json.JSONDecodeError: return [gr.Markdown("Error parsing analysis results.")] else: results = analysis_results output_components = [] # Check if we have valid results if not results or "analyses" not in results: return [gr.Markdown("No analysis results found.")] # Process each prompt for prompt, analyses in results["analyses"].items(): output_components.append(gr.Markdown(f"## Analysis of Prompt: \"{prompt}\"")) # Process Bag of Words analysis if available if "bag_of_words" in analyses: bow_results = analyses["bag_of_words"] # Show models being compared models = bow_results.get("models", []) if len(models) >= 2: output_components.append(gr.Markdown(f"### Comparing responses from {models[0]} and {models[1]}")) # Get important words for each model important_words = bow_results.get("important_words", {}) # Prepare data for plotting important words if important_words: for model_name, words in important_words.items(): df = pd.DataFrame(words) # Create bar chart for top words fig = px.bar(df, x='word', y='count', title=f"Top Words Used by {model_name}", labels={'word': 'Word', 'count': 'Frequency'}, height=400) # Improve layout fig.update_layout( xaxis_title="Word", yaxis_title="Frequency", xaxis={'categoryorder':'total descending'} ) output_components.append(gr.Plot(value=fig)) # Visualize differential words (words with biggest frequency difference) diff_words = bow_results.get("differential_words", []) word_matrix = bow_results.get("word_count_matrix", {}) if diff_words and word_matrix and len(diff_words) > 0: output_components.append(gr.Markdown("### Words with Biggest Frequency Differences")) # Create dataframe for plotting model1, model2 = models[0], models[1] diff_data = [] for word in diff_words[:15]: # Limit to top 15 for readability if word in word_matrix: counts = word_matrix[word] diff_data.append({ "word": word, model1: counts.get(model1, 0), model2: counts.get(model2, 0) }) if diff_data: diff_df = pd.DataFrame(diff_data) # Create grouped bar chart fig = go.Figure() fig.add_trace(go.Bar( x=diff_df['word'], y=diff_df[model1], name=model1, marker_color='indianred' )) fig.add_trace(go.Bar( x=diff_df['word'], y=diff_df[model2], name=model2, marker_color='lightsalmon' )) fig.update_layout( title="Word Frequency Comparison", xaxis_title="Word", yaxis_title="Frequency", barmode='group', height=500 ) output_components.append(gr.Plot(value=fig)) # If no components were added, show a message if len(output_components) <= 1: output_components.append(gr.Markdown("No detailed Bag of Words analysis found in results.")) return output_components # update the process_and_visualize_analysis function def process_and_visualize_analysis(analysis_results): """ Process the analysis results and create visualization components Args: analysis_results (dict): The analysis results Returns: list: List of gradio components for visualization """ try: print(f"Starting visualization of analysis results: {type(analysis_results)}") components = [] if not analysis_results or "analyses" not in analysis_results: print("Warning: Empty or invalid analysis results") components.append(gr.Markdown("No analysis results to visualize.")) return components # For each prompt in the analysis results for prompt, analyses in analysis_results.get("analyses", {}).items(): print(f"Visualizing results for prompt: {prompt[:30]}...") components.append(gr.Markdown(f"## Analysis for Prompt:\n\"{prompt}\"")) # Check for Bag of Words analysis if "bag_of_words" in analyses: print("Processing Bag of Words visualization") components.append(gr.Markdown("### Bag of Words Analysis")) bow_results = analyses["bag_of_words"] # Display models compared if "models" in bow_results: models = bow_results["models"] components.append(gr.Markdown(f"**Models compared**: {', '.join(models)}")) # Display important words for each model if "important_words" in bow_results: components.append(gr.Markdown("#### Most Common Words by Model")) for model, words in bow_results["important_words"].items(): print(f"Creating word list for model {model}") word_list = [f"{item['word']} ({item['count']})" for item in words[:10]] components.append(gr.Markdown(f"**{model}**: {', '.join(word_list)}")) # Add visualizations for word frequency differences if "differential_words" in bow_results and "word_count_matrix" in bow_results and len( bow_results["models"]) >= 2: diff_words = bow_results["differential_words"] word_matrix = bow_results["word_count_matrix"] models = bow_results["models"] if diff_words and word_matrix and len(diff_words) > 0: components.append(gr.Markdown("### Words with Biggest Frequency Differences")) # Create dataframe for plotting model1, model2 = models[0], models[1] diff_data = [] for word in diff_words[:10]: # Limit to top 10 for readability if word in word_matrix: counts = word_matrix[word] model1_count = counts.get(model1, 0) model2_count = counts.get(model2, 0) # Only include if there's a meaningful difference if abs(model1_count - model2_count) > 0: components.append(gr.Markdown( f"- **{word}**: {model1}: {model1_count}, {model2}: {model2_count}" )) # Check for N-gram analysis if "ngram_analysis" in analyses: print("Processing N-gram visualization") # Use the dedicated n-gram visualization function ngram_components = create_ngram_visualization( {"analyses": {prompt: {"ngram_analysis": analyses["ngram_analysis"]}}}) components.extend(ngram_components) # Check for Topic Modeling analysis if "topic_modeling" in analyses: print("Processing Topic Modeling visualization") # Use the dedicated topic visualization function topic_components = process_and_visualize_topic_analysis( {"analyses": {prompt: {"topic_modeling": analyses["topic_modeling"]}}}) components.extend(topic_components) if not components: components.append(gr.Markdown("No visualization components could be created from the analysis results.")) print(f"Visualization complete: generated {len(components)} components") return components except Exception as e: import traceback error_msg = f"Visualization error: {str(e)}\n{traceback.format_exc()}" print(error_msg) return [gr.Markdown(f"**Error during visualization:**\n\n```\n{error_msg}\n```")]