lambdaofgod's picture
updated embeddings
c752e68
from graph_visualizations import *
from text_visualization import WordCloudExtractor
import logging
from functools import partial
import gradio as gr
def display_representations(repos_df, repo, representation1, representation2):
repo_data = repos_df[repos_df["repo_name"] == repo]
tasks = repo_data["tasks"].iloc[0]
logging.info(f"repo_data: {repo_data}")
text1 = (
repo_data[repo_data["representation"] == representation1]["text"].iloc[0]
if not repo_data[repo_data["representation"] == representation1].empty
else "No data available"
)
text2 = (
repo_data[repo_data["representation"] == representation2]["text"].iloc[0]
if not repo_data[repo_data["representation"] == representation2].empty
else "No data available"
)
return tasks, text1, text2
def get_representation_wordclouds(representations, repos_df):
wordclouds = dict()
for representation in representations:
texts = list(repos_df[repos_df["representation"] == representation]["text"])
wordclouds[representation] = WordCloudExtractor().extract_wordcloud_image(texts)
return wordclouds
def load_embeddings_description():
return
def display_wordclouds(representation_types, repos_df):
wordcloud_dict = get_representation_wordclouds(representation_types, repos_df)
gr.Markdown("# Wordclouds")
gr.Gallery(
[
(wordcloud, representation_type)
for representation_type, wordcloud in wordcloud_dict.items()
],
columns=[3],
rows=[4],
height=300,
)
def setup_repository_representations_tab(repos_df, repos, representation_types):
gr.Markdown("# Comparing repository representations")
gr.Markdown("Select a repository and two representation types to compare them.")
with gr.Row():
repo = gr.Dropdown(choices=repos, label="Repository", value=repos[0])
representation1 = gr.Dropdown(
choices=representation_types, label="Representation 1", value="readme"
)
representation2 = gr.Dropdown(
choices=representation_types,
label="Representation 2",
value="code2doc_generated_readme",
)
displayed_tasks = gr.Markdown(elem_id="tasks")
with gr.Row():
with gr.Column(
elem_id="column1",
variant="panel",
scale=1,
min_width=300,
):
text1 = gr.Markdown()
with gr.Column(
elem_id="column2",
variant="panel",
scale=1,
min_width=300,
):
text2 = gr.Markdown()
def update_representations(repo, representation1, representation2):
tasks, text1_content, text2_content = display_representations(
repos_df, repo, representation1, representation2
)
return (
"## Repository PapersWithCode tasks:\n" + ", ".join(tasks),
f"### Representation 1: {representation1}\n\n{text1_content}",
f"### Representation 2: {representation2}\n\n{text2_content}",
)
# Initial call to populate textboxes with default values
displayed_tasks.value, text1.value, text2.value = update_representations(
repos[0], "readme", "code2doc_generated_readme"
)
for component in [repo, representation1, representation2]:
component.change(
fn=update_representations,
inputs=[repo, representation1, representation2],
outputs=[displayed_tasks, text1, text2],
)
display_wordclouds(representation_types, repos_df)
def setup_tasks_tab(descriptions, task_visualizations):
gr.Markdown(descriptions["task_counts_description"])
with gr.Row():
min_task_counts_slider_all = gr.Slider(
minimum=50,
maximum=1000,
value=150,
step=50,
label="Minimum Task Count (All Repositories)",
)
update_button = gr.Button("Update Plots")
min_task_counts_slider_selected = gr.Slider(
minimum=10,
maximum=100,
value=50,
step=10,
label="Minimum Task Count (Selected Repositories)",
)
update_selected_button = gr.Button("Update Plots")
with gr.Row("Task Counts"):
all_repos_tasks_plot = gr.Plot(label="All Repositories")
selected_repos_tasks_plot = gr.Plot(label="Selected Repositories")
update_button.click(
fn=partial(task_visualizations.get_tasks_sunburst, which_df="all"),
inputs=[min_task_counts_slider_all],
outputs=[all_repos_tasks_plot],
)
update_selected_button.click(
fn=partial(task_visualizations.get_tasks_sunburst, which_df="selected"),
inputs=[min_task_counts_slider_selected],
outputs=[selected_repos_tasks_plot],
)
def setup_embeddings_tab(descriptions, embedding_visualizer):
tab_elems = [
gr.Markdown("## Tasks by area"),
gr.Markdown(descriptions["intro"]),
gr.Plot(embedding_visualizer.make_task_area_scatterplot()),
]
embedding_plots = embedding_visualizer.make_embedding_plots(
color_col="representation"
)
for plot_name in [
"Basic representations",
"Dependency graph based representations",
"READMEs",
]:
tab_elems.append(gr.Markdown(f"## {plot_name}"))
if descriptions.get(plot_name):
tab_elems.append(gr.Markdown(descriptions[plot_name]))
tab_elems.append(gr.Plot(embedding_plots[plot_name]))
gr.Column(tab_elems)
def setup_graph_tab():
gr.Markdown("# Dependency Graph Visualization")
gr.Markdown("Select a repository to visualize its dependency graph.")
graphs_dict = init_graphs()
repo_names = list(graphs_dict.keys())
def plot_selected_repo(repo_name, layout_type, *edge_type_checkboxes):
# Convert checkbox values to selected edge types
edge_types = (
get_available_edge_types(graphs_dict[repo_name])
if repo_name in graphs_dict
else []
)
selected_edge_types = set()
for i, is_selected in enumerate(edge_type_checkboxes):
if is_selected and i < len(edge_types):
selected_edge_types.add(edge_types[i])
fig, stats = visualize_graph(
repo_name, graphs_dict, layout_type, selected_edge_types
)
return fig, stats
def update_edge_checkboxes(repo_name):
"""Update edge type checkboxes when repository changes"""
if repo_name not in graphs_dict:
return [gr.Checkbox(visible=False)] * 8
edge_types = get_available_edge_types(graphs_dict[repo_name])
checkboxes = []
# Create checkboxes for each edge type (up to 8)
for i in range(8):
if i < len(edge_types):
edge_type = edge_types[i]
# function-function should be unchecked by default
default_value = edge_type != "function-function"
checkboxes.append(
gr.Checkbox(label=edge_type, value=default_value, visible=True)
)
else:
checkboxes.append(gr.Checkbox(visible=False))
return checkboxes
# Get initial edge types for the first repository
initial_edge_types = []
if repo_names:
initial_edge_types = get_available_edge_types(graphs_dict[repo_names[0]])
with gr.Row():
with gr.Column(scale=1):
repo_dropdown = gr.Dropdown(
choices=repo_names,
label="Select Repository",
value=repo_names[0] if repo_names else None,
)
layout_dropdown = gr.Dropdown(
choices=[
("Spring Layout (Force-directed)", "spring"),
("Circular Layout", "circular"),
("Kamada-Kawai Layout", "kamada_kawai"),
("Fruchterman-Reingold Layout", "fruchterman_reingold"),
("Shell Layout", "shell"),
("Spectral Layout", "spectral"),
("Planar Layout", "planar"),
],
label="Select Layout",
value="spring",
)
gr.Markdown("### Edge Type Filters")
gr.Markdown("Select which edge types to display:")
# Create checkboxes for edge types with initial values
edge_checkboxes = []
for i in range(8): # Support up to 8 edge types
if i < len(initial_edge_types):
checkbox = gr.Checkbox(
label=initial_edge_types[i], value=True, visible=True
)
else:
checkbox = gr.Checkbox(label=f"Edge Type {i+1}", visible=False)
edge_checkboxes.append(checkbox)
visualize_btn = gr.Button("Visualize Graph", variant="primary")
stats_text = gr.Textbox(
label="Graph Statistics", lines=6, interactive=False
)
with gr.Column(scale=2):
graph_plot = gr.Plot(label="Interactive Dependency Graph")
# Set up event handlers
all_inputs = [repo_dropdown, layout_dropdown] + edge_checkboxes
visualize_btn.click(
fn=plot_selected_repo,
inputs=all_inputs,
outputs=[graph_plot, stats_text],
)
# Update checkboxes when repository changes
repo_dropdown.change(
fn=update_edge_checkboxes,
inputs=[repo_dropdown],
outputs=edge_checkboxes,
)
# Auto-visualize on dropdown change
repo_dropdown.change(
fn=plot_selected_repo,
inputs=all_inputs,
outputs=[graph_plot, stats_text],
)
# Auto-visualize on layout change
layout_dropdown.change(
fn=plot_selected_repo,
inputs=all_inputs,
outputs=[graph_plot, stats_text],
)
# Auto-visualize on checkbox changes
for checkbox in edge_checkboxes:
checkbox.change(
fn=plot_selected_repo,
inputs=all_inputs,
outputs=[graph_plot, stats_text],
)