|
from graph_visualizations import * |
|
from text_visualization import WordCloudExtractor |
|
import logging |
|
from functools import partial |
|
import gradio as gr |
|
|
|
|
|
def display_representations(repos_df, repo, representation1, representation2): |
|
repo_data = repos_df[repos_df["repo_name"] == repo] |
|
tasks = repo_data["tasks"].iloc[0] |
|
logging.info(f"repo_data: {repo_data}") |
|
text1 = ( |
|
repo_data[repo_data["representation"] == representation1]["text"].iloc[0] |
|
if not repo_data[repo_data["representation"] == representation1].empty |
|
else "No data available" |
|
) |
|
text2 = ( |
|
repo_data[repo_data["representation"] == representation2]["text"].iloc[0] |
|
if not repo_data[repo_data["representation"] == representation2].empty |
|
else "No data available" |
|
) |
|
|
|
return tasks, text1, text2 |
|
|
|
|
|
def get_representation_wordclouds(representations, repos_df): |
|
wordclouds = dict() |
|
for representation in representations: |
|
texts = list(repos_df[repos_df["representation"] == representation]["text"]) |
|
wordclouds[representation] = WordCloudExtractor().extract_wordcloud_image(texts) |
|
return wordclouds |
|
|
|
|
|
def load_embeddings_description(): |
|
return |
|
|
|
|
|
def display_wordclouds(representation_types, repos_df): |
|
wordcloud_dict = get_representation_wordclouds(representation_types, repos_df) |
|
gr.Markdown("# Wordclouds") |
|
gr.Gallery( |
|
[ |
|
(wordcloud, representation_type) |
|
for representation_type, wordcloud in wordcloud_dict.items() |
|
], |
|
columns=[3], |
|
rows=[4], |
|
height=300, |
|
) |
|
|
|
|
|
def setup_repository_representations_tab(repos_df, repos, representation_types): |
|
|
|
gr.Markdown("# Comparing repository representations") |
|
gr.Markdown("Select a repository and two representation types to compare them.") |
|
with gr.Row(): |
|
repo = gr.Dropdown(choices=repos, label="Repository", value=repos[0]) |
|
representation1 = gr.Dropdown( |
|
choices=representation_types, label="Representation 1", value="readme" |
|
) |
|
representation2 = gr.Dropdown( |
|
choices=representation_types, |
|
label="Representation 2", |
|
value="code2doc_generated_readme", |
|
) |
|
|
|
displayed_tasks = gr.Markdown(elem_id="tasks") |
|
with gr.Row(): |
|
with gr.Column( |
|
elem_id="column1", |
|
variant="panel", |
|
scale=1, |
|
min_width=300, |
|
): |
|
text1 = gr.Markdown() |
|
with gr.Column( |
|
elem_id="column2", |
|
variant="panel", |
|
scale=1, |
|
min_width=300, |
|
): |
|
text2 = gr.Markdown() |
|
|
|
def update_representations(repo, representation1, representation2): |
|
tasks, text1_content, text2_content = display_representations( |
|
repos_df, repo, representation1, representation2 |
|
) |
|
return ( |
|
"## Repository PapersWithCode tasks:\n" + ", ".join(tasks), |
|
f"### Representation 1: {representation1}\n\n{text1_content}", |
|
f"### Representation 2: {representation2}\n\n{text2_content}", |
|
) |
|
|
|
|
|
displayed_tasks.value, text1.value, text2.value = update_representations( |
|
repos[0], "readme", "code2doc_generated_readme" |
|
) |
|
|
|
for component in [repo, representation1, representation2]: |
|
component.change( |
|
fn=update_representations, |
|
inputs=[repo, representation1, representation2], |
|
outputs=[displayed_tasks, text1, text2], |
|
) |
|
|
|
display_wordclouds(representation_types, repos_df) |
|
|
|
|
|
def setup_tasks_tab(descriptions, task_visualizations): |
|
|
|
gr.Markdown(descriptions["task_counts_description"]) |
|
|
|
with gr.Row(): |
|
min_task_counts_slider_all = gr.Slider( |
|
minimum=50, |
|
maximum=1000, |
|
value=150, |
|
step=50, |
|
label="Minimum Task Count (All Repositories)", |
|
) |
|
update_button = gr.Button("Update Plots") |
|
min_task_counts_slider_selected = gr.Slider( |
|
minimum=10, |
|
maximum=100, |
|
value=50, |
|
step=10, |
|
label="Minimum Task Count (Selected Repositories)", |
|
) |
|
update_selected_button = gr.Button("Update Plots") |
|
|
|
with gr.Row("Task Counts"): |
|
all_repos_tasks_plot = gr.Plot(label="All Repositories") |
|
selected_repos_tasks_plot = gr.Plot(label="Selected Repositories") |
|
|
|
update_button.click( |
|
fn=partial(task_visualizations.get_tasks_sunburst, which_df="all"), |
|
inputs=[min_task_counts_slider_all], |
|
outputs=[all_repos_tasks_plot], |
|
) |
|
|
|
update_selected_button.click( |
|
fn=partial(task_visualizations.get_tasks_sunburst, which_df="selected"), |
|
inputs=[min_task_counts_slider_selected], |
|
outputs=[selected_repos_tasks_plot], |
|
) |
|
|
|
|
|
def setup_embeddings_tab(descriptions, embedding_visualizer): |
|
tab_elems = [ |
|
gr.Markdown("## Tasks by area"), |
|
gr.Markdown(descriptions["intro"]), |
|
gr.Plot(embedding_visualizer.make_task_area_scatterplot()), |
|
] |
|
|
|
embedding_plots = embedding_visualizer.make_embedding_plots( |
|
color_col="representation" |
|
) |
|
for plot_name in [ |
|
"Basic representations", |
|
"Dependency graph based representations", |
|
"READMEs", |
|
]: |
|
tab_elems.append(gr.Markdown(f"## {plot_name}")) |
|
if descriptions.get(plot_name): |
|
tab_elems.append(gr.Markdown(descriptions[plot_name])) |
|
tab_elems.append(gr.Plot(embedding_plots[plot_name])) |
|
gr.Column(tab_elems) |
|
|
|
|
|
def setup_graph_tab(): |
|
gr.Markdown("# Dependency Graph Visualization") |
|
gr.Markdown("Select a repository to visualize its dependency graph.") |
|
graphs_dict = init_graphs() |
|
repo_names = list(graphs_dict.keys()) |
|
|
|
def plot_selected_repo(repo_name, layout_type, *edge_type_checkboxes): |
|
|
|
edge_types = ( |
|
get_available_edge_types(graphs_dict[repo_name]) |
|
if repo_name in graphs_dict |
|
else [] |
|
) |
|
selected_edge_types = set() |
|
for i, is_selected in enumerate(edge_type_checkboxes): |
|
if is_selected and i < len(edge_types): |
|
selected_edge_types.add(edge_types[i]) |
|
|
|
fig, stats = visualize_graph( |
|
repo_name, graphs_dict, layout_type, selected_edge_types |
|
) |
|
return fig, stats |
|
|
|
def update_edge_checkboxes(repo_name): |
|
"""Update edge type checkboxes when repository changes""" |
|
if repo_name not in graphs_dict: |
|
return [gr.Checkbox(visible=False)] * 8 |
|
|
|
edge_types = get_available_edge_types(graphs_dict[repo_name]) |
|
checkboxes = [] |
|
|
|
|
|
for i in range(8): |
|
if i < len(edge_types): |
|
edge_type = edge_types[i] |
|
|
|
default_value = edge_type != "function-function" |
|
checkboxes.append( |
|
gr.Checkbox(label=edge_type, value=default_value, visible=True) |
|
) |
|
else: |
|
checkboxes.append(gr.Checkbox(visible=False)) |
|
|
|
return checkboxes |
|
|
|
|
|
initial_edge_types = [] |
|
if repo_names: |
|
initial_edge_types = get_available_edge_types(graphs_dict[repo_names[0]]) |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
repo_dropdown = gr.Dropdown( |
|
choices=repo_names, |
|
label="Select Repository", |
|
value=repo_names[0] if repo_names else None, |
|
) |
|
|
|
layout_dropdown = gr.Dropdown( |
|
choices=[ |
|
("Spring Layout (Force-directed)", "spring"), |
|
("Circular Layout", "circular"), |
|
("Kamada-Kawai Layout", "kamada_kawai"), |
|
("Fruchterman-Reingold Layout", "fruchterman_reingold"), |
|
("Shell Layout", "shell"), |
|
("Spectral Layout", "spectral"), |
|
("Planar Layout", "planar"), |
|
], |
|
label="Select Layout", |
|
value="spring", |
|
) |
|
|
|
gr.Markdown("### Edge Type Filters") |
|
gr.Markdown("Select which edge types to display:") |
|
|
|
|
|
edge_checkboxes = [] |
|
for i in range(8): |
|
if i < len(initial_edge_types): |
|
checkbox = gr.Checkbox( |
|
label=initial_edge_types[i], value=True, visible=True |
|
) |
|
else: |
|
checkbox = gr.Checkbox(label=f"Edge Type {i+1}", visible=False) |
|
edge_checkboxes.append(checkbox) |
|
|
|
visualize_btn = gr.Button("Visualize Graph", variant="primary") |
|
|
|
stats_text = gr.Textbox( |
|
label="Graph Statistics", lines=6, interactive=False |
|
) |
|
|
|
with gr.Column(scale=2): |
|
graph_plot = gr.Plot(label="Interactive Dependency Graph") |
|
|
|
|
|
all_inputs = [repo_dropdown, layout_dropdown] + edge_checkboxes |
|
|
|
visualize_btn.click( |
|
fn=plot_selected_repo, |
|
inputs=all_inputs, |
|
outputs=[graph_plot, stats_text], |
|
) |
|
|
|
|
|
repo_dropdown.change( |
|
fn=update_edge_checkboxes, |
|
inputs=[repo_dropdown], |
|
outputs=edge_checkboxes, |
|
) |
|
|
|
|
|
repo_dropdown.change( |
|
fn=plot_selected_repo, |
|
inputs=all_inputs, |
|
outputs=[graph_plot, stats_text], |
|
) |
|
|
|
|
|
layout_dropdown.change( |
|
fn=plot_selected_repo, |
|
inputs=all_inputs, |
|
outputs=[graph_plot, stats_text], |
|
) |
|
|
|
|
|
for checkbox in edge_checkboxes: |
|
checkbox.change( |
|
fn=plot_selected_repo, |
|
inputs=all_inputs, |
|
outputs=[graph_plot, stats_text], |
|
) |
|
|