from graph_visualizations import * from text_visualization import WordCloudExtractor import logging from functools import partial import gradio as gr def display_representations(repos_df, repo, representation1, representation2): repo_data = repos_df[repos_df["repo_name"] == repo] tasks = repo_data["tasks"].iloc[0] logging.info(f"repo_data: {repo_data}") text1 = ( repo_data[repo_data["representation"] == representation1]["text"].iloc[0] if not repo_data[repo_data["representation"] == representation1].empty else "No data available" ) text2 = ( repo_data[repo_data["representation"] == representation2]["text"].iloc[0] if not repo_data[repo_data["representation"] == representation2].empty else "No data available" ) return tasks, text1, text2 def get_representation_wordclouds(representations, repos_df): wordclouds = dict() for representation in representations: texts = list(repos_df[repos_df["representation"] == representation]["text"]) wordclouds[representation] = WordCloudExtractor().extract_wordcloud_image(texts) return wordclouds def load_embeddings_description(): return def display_wordclouds(representation_types, repos_df): wordcloud_dict = get_representation_wordclouds(representation_types, repos_df) gr.Markdown("# Wordclouds") gr.Gallery( [ (wordcloud, representation_type) for representation_type, wordcloud in wordcloud_dict.items() ], columns=[3], rows=[4], height=300, ) def setup_repository_representations_tab(repos_df, repos, representation_types): gr.Markdown("# Comparing repository representations") gr.Markdown("Select a repository and two representation types to compare them.") with gr.Row(): repo = gr.Dropdown(choices=repos, label="Repository", value=repos[0]) representation1 = gr.Dropdown( choices=representation_types, label="Representation 1", value="readme" ) representation2 = gr.Dropdown( choices=representation_types, label="Representation 2", value="code2doc_generated_readme", ) displayed_tasks = gr.Markdown(elem_id="tasks") with gr.Row(): with gr.Column( elem_id="column1", variant="panel", scale=1, min_width=300, ): text1 = gr.Markdown() with gr.Column( elem_id="column2", variant="panel", scale=1, min_width=300, ): text2 = gr.Markdown() def update_representations(repo, representation1, representation2): tasks, text1_content, text2_content = display_representations( repos_df, repo, representation1, representation2 ) return ( "## Repository PapersWithCode tasks:\n" + ", ".join(tasks), f"### Representation 1: {representation1}\n\n{text1_content}", f"### Representation 2: {representation2}\n\n{text2_content}", ) # Initial call to populate textboxes with default values displayed_tasks.value, text1.value, text2.value = update_representations( repos[0], "readme", "code2doc_generated_readme" ) for component in [repo, representation1, representation2]: component.change( fn=update_representations, inputs=[repo, representation1, representation2], outputs=[displayed_tasks, text1, text2], ) display_wordclouds(representation_types, repos_df) def setup_tasks_tab(descriptions, task_visualizations): gr.Markdown(descriptions["task_counts_description"]) with gr.Row(): min_task_counts_slider_all = gr.Slider( minimum=50, maximum=1000, value=150, step=50, label="Minimum Task Count (All Repositories)", ) update_button = gr.Button("Update Plots") min_task_counts_slider_selected = gr.Slider( minimum=10, maximum=100, value=50, step=10, label="Minimum Task Count (Selected Repositories)", ) update_selected_button = gr.Button("Update Plots") with gr.Row("Task Counts"): all_repos_tasks_plot = gr.Plot(label="All Repositories") selected_repos_tasks_plot = gr.Plot(label="Selected Repositories") update_button.click( fn=partial(task_visualizations.get_tasks_sunburst, which_df="all"), inputs=[min_task_counts_slider_all], outputs=[all_repos_tasks_plot], ) update_selected_button.click( fn=partial(task_visualizations.get_tasks_sunburst, which_df="selected"), inputs=[min_task_counts_slider_selected], outputs=[selected_repos_tasks_plot], ) def setup_embeddings_tab(descriptions, embedding_visualizer): tab_elems = [ gr.Markdown("## Tasks by area"), gr.Markdown(descriptions["intro"]), gr.Plot(embedding_visualizer.make_task_area_scatterplot()), ] embedding_plots = embedding_visualizer.make_embedding_plots( color_col="representation" ) for plot_name in [ "Basic representations", "Dependency graph based representations", "READMEs", ]: tab_elems.append(gr.Markdown(f"## {plot_name}")) if descriptions.get(plot_name): tab_elems.append(gr.Markdown(descriptions[plot_name])) tab_elems.append(gr.Plot(embedding_plots[plot_name])) gr.Column(tab_elems) def setup_graph_tab(): gr.Markdown("# Dependency Graph Visualization") gr.Markdown("Select a repository to visualize its dependency graph.") graphs_dict = init_graphs() repo_names = list(graphs_dict.keys()) def plot_selected_repo(repo_name, layout_type, *edge_type_checkboxes): # Convert checkbox values to selected edge types edge_types = ( get_available_edge_types(graphs_dict[repo_name]) if repo_name in graphs_dict else [] ) selected_edge_types = set() for i, is_selected in enumerate(edge_type_checkboxes): if is_selected and i < len(edge_types): selected_edge_types.add(edge_types[i]) fig, stats = visualize_graph( repo_name, graphs_dict, layout_type, selected_edge_types ) return fig, stats def update_edge_checkboxes(repo_name): """Update edge type checkboxes when repository changes""" if repo_name not in graphs_dict: return [gr.Checkbox(visible=False)] * 8 edge_types = get_available_edge_types(graphs_dict[repo_name]) checkboxes = [] # Create checkboxes for each edge type (up to 8) for i in range(8): if i < len(edge_types): edge_type = edge_types[i] # function-function should be unchecked by default default_value = edge_type != "function-function" checkboxes.append( gr.Checkbox(label=edge_type, value=default_value, visible=True) ) else: checkboxes.append(gr.Checkbox(visible=False)) return checkboxes # Get initial edge types for the first repository initial_edge_types = [] if repo_names: initial_edge_types = get_available_edge_types(graphs_dict[repo_names[0]]) with gr.Row(): with gr.Column(scale=1): repo_dropdown = gr.Dropdown( choices=repo_names, label="Select Repository", value=repo_names[0] if repo_names else None, ) layout_dropdown = gr.Dropdown( choices=[ ("Spring Layout (Force-directed)", "spring"), ("Circular Layout", "circular"), ("Kamada-Kawai Layout", "kamada_kawai"), ("Fruchterman-Reingold Layout", "fruchterman_reingold"), ("Shell Layout", "shell"), ("Spectral Layout", "spectral"), ("Planar Layout", "planar"), ], label="Select Layout", value="spring", ) gr.Markdown("### Edge Type Filters") gr.Markdown("Select which edge types to display:") # Create checkboxes for edge types with initial values edge_checkboxes = [] for i in range(8): # Support up to 8 edge types if i < len(initial_edge_types): checkbox = gr.Checkbox( label=initial_edge_types[i], value=True, visible=True ) else: checkbox = gr.Checkbox(label=f"Edge Type {i+1}", visible=False) edge_checkboxes.append(checkbox) visualize_btn = gr.Button("Visualize Graph", variant="primary") stats_text = gr.Textbox( label="Graph Statistics", lines=6, interactive=False ) with gr.Column(scale=2): graph_plot = gr.Plot(label="Interactive Dependency Graph") # Set up event handlers all_inputs = [repo_dropdown, layout_dropdown] + edge_checkboxes visualize_btn.click( fn=plot_selected_repo, inputs=all_inputs, outputs=[graph_plot, stats_text], ) # Update checkboxes when repository changes repo_dropdown.change( fn=update_edge_checkboxes, inputs=[repo_dropdown], outputs=edge_checkboxes, ) # Auto-visualize on dropdown change repo_dropdown.change( fn=plot_selected_repo, inputs=all_inputs, outputs=[graph_plot, stats_text], ) # Auto-visualize on layout change layout_dropdown.change( fn=plot_selected_repo, inputs=all_inputs, outputs=[graph_plot, stats_text], ) # Auto-visualize on checkbox changes for checkbox in edge_checkboxes: checkbox.change( fn=plot_selected_repo, inputs=all_inputs, outputs=[graph_plot, stats_text], )