Spaces:

lambdaofgod
/

github_search_visualizations

Sleeping

App Files Files Community

github_search_visualizations / gradio_tabs.py

lambdaofgod

updated embeddings

c752e68 19 days ago

raw

history blame contribute delete

10.2 kB

	from graph_visualizations import *
	from text_visualization import WordCloudExtractor
	import logging
	from functools import partial
	import gradio as gr


	def display_representations(repos_df, repo, representation1, representation2):
	repo_data = repos_df[repos_df["repo_name"] == repo]
	tasks = repo_data["tasks"].iloc[0]
	logging.info(f"repo_data: {repo_data}")
	text1 = (
	repo_data[repo_data["representation"] == representation1]["text"].iloc[0]
	if not repo_data[repo_data["representation"] == representation1].empty
	else "No data available"
	)
	text2 = (
	repo_data[repo_data["representation"] == representation2]["text"].iloc[0]
	if not repo_data[repo_data["representation"] == representation2].empty
	else "No data available"
	)

	return tasks, text1, text2


	def get_representation_wordclouds(representations, repos_df):
	wordclouds = dict()
	for representation in representations:
	texts = list(repos_df[repos_df["representation"] == representation]["text"])
	wordclouds[representation] = WordCloudExtractor().extract_wordcloud_image(texts)
	return wordclouds


	def load_embeddings_description():
	return


	def display_wordclouds(representation_types, repos_df):
	wordcloud_dict = get_representation_wordclouds(representation_types, repos_df)
	gr.Markdown("# Wordclouds")
	gr.Gallery(
	[
	(wordcloud, representation_type)
	for representation_type, wordcloud in wordcloud_dict.items()
	],
	columns=[3],
	rows=[4],
	height=300,
	)


	def setup_repository_representations_tab(repos_df, repos, representation_types):

	gr.Markdown("# Comparing repository representations")
	gr.Markdown("Select a repository and two representation types to compare them.")
	with gr.Row():
	repo = gr.Dropdown(choices=repos, label="Repository", value=repos[0])
	representation1 = gr.Dropdown(
	choices=representation_types, label="Representation 1", value="readme"
	)
	representation2 = gr.Dropdown(
	choices=representation_types,
	label="Representation 2",
	value="code2doc_generated_readme",
	)

	displayed_tasks = gr.Markdown(elem_id="tasks")
	with gr.Row():
	with gr.Column(
	elem_id="column1",
	variant="panel",
	scale=1,
	min_width=300,
	):
	text1 = gr.Markdown()
	with gr.Column(
	elem_id="column2",
	variant="panel",
	scale=1,
	min_width=300,
	):
	text2 = gr.Markdown()

	def update_representations(repo, representation1, representation2):
	tasks, text1_content, text2_content = display_representations(
	repos_df, repo, representation1, representation2
	)
	return (
	"## Repository PapersWithCode tasks:\n" + ", ".join(tasks),
	f"### Representation 1: {representation1}\n\n{text1_content}",
	f"### Representation 2: {representation2}\n\n{text2_content}",
	)

	# Initial call to populate textboxes with default values
	displayed_tasks.value, text1.value, text2.value = update_representations(
	repos[0], "readme", "code2doc_generated_readme"
	)

	for component in [repo, representation1, representation2]:
	component.change(
	fn=update_representations,
	inputs=[repo, representation1, representation2],
	outputs=[displayed_tasks, text1, text2],
	)

	display_wordclouds(representation_types, repos_df)


	def setup_tasks_tab(descriptions, task_visualizations):

	gr.Markdown(descriptions["task_counts_description"])

	with gr.Row():
	min_task_counts_slider_all = gr.Slider(
	minimum=50,
	maximum=1000,
	value=150,
	step=50,
	label="Minimum Task Count (All Repositories)",
	)
	update_button = gr.Button("Update Plots")
	min_task_counts_slider_selected = gr.Slider(
	minimum=10,
	maximum=100,
	value=50,
	step=10,
	label="Minimum Task Count (Selected Repositories)",
	)
	update_selected_button = gr.Button("Update Plots")

	with gr.Row("Task Counts"):
	all_repos_tasks_plot = gr.Plot(label="All Repositories")
	selected_repos_tasks_plot = gr.Plot(label="Selected Repositories")

	update_button.click(
	fn=partial(task_visualizations.get_tasks_sunburst, which_df="all"),
	inputs=[min_task_counts_slider_all],
	outputs=[all_repos_tasks_plot],
	)

	update_selected_button.click(
	fn=partial(task_visualizations.get_tasks_sunburst, which_df="selected"),
	inputs=[min_task_counts_slider_selected],
	outputs=[selected_repos_tasks_plot],
	)


	def setup_embeddings_tab(descriptions, embedding_visualizer):
	tab_elems = [
	gr.Markdown("## Tasks by area"),
	gr.Markdown(descriptions["intro"]),
	gr.Plot(embedding_visualizer.make_task_area_scatterplot()),
	]

	embedding_plots = embedding_visualizer.make_embedding_plots(
	color_col="representation"
	)
	for plot_name in [
	"Basic representations",
	"Dependency graph based representations",
	"READMEs",
	]:
	tab_elems.append(gr.Markdown(f"## {plot_name}"))
	if descriptions.get(plot_name):
	tab_elems.append(gr.Markdown(descriptions[plot_name]))
	tab_elems.append(gr.Plot(embedding_plots[plot_name]))
	gr.Column(tab_elems)


	def setup_graph_tab():
	gr.Markdown("# Dependency Graph Visualization")
	gr.Markdown("Select a repository to visualize its dependency graph.")
	graphs_dict = init_graphs()
	repo_names = list(graphs_dict.keys())

	def plot_selected_repo(repo_name, layout_type, *edge_type_checkboxes):
	# Convert checkbox values to selected edge types
	edge_types = (
	get_available_edge_types(graphs_dict[repo_name])
	if repo_name in graphs_dict
	else []
	)
	selected_edge_types = set()
	for i, is_selected in enumerate(edge_type_checkboxes):
	if is_selected and i < len(edge_types):
	selected_edge_types.add(edge_types[i])

	fig, stats = visualize_graph(
	repo_name, graphs_dict, layout_type, selected_edge_types
	)
	return fig, stats

	def update_edge_checkboxes(repo_name):
	"""Update edge type checkboxes when repository changes"""
	if repo_name not in graphs_dict:
	return [gr.Checkbox(visible=False)] * 8

	edge_types = get_available_edge_types(graphs_dict[repo_name])
	checkboxes = []

	# Create checkboxes for each edge type (up to 8)
	for i in range(8):
	if i < len(edge_types):
	edge_type = edge_types[i]
	# function-function should be unchecked by default
	default_value = edge_type != "function-function"
	checkboxes.append(
	gr.Checkbox(label=edge_type, value=default_value, visible=True)
	)
	else:
	checkboxes.append(gr.Checkbox(visible=False))

	return checkboxes

	# Get initial edge types for the first repository
	initial_edge_types = []
	if repo_names:
	initial_edge_types = get_available_edge_types(graphs_dict[repo_names[0]])

	with gr.Row():
	with gr.Column(scale=1):
	repo_dropdown = gr.Dropdown(
	choices=repo_names,
	label="Select Repository",
	value=repo_names[0] if repo_names else None,
	)

	layout_dropdown = gr.Dropdown(
	choices=[
	("Spring Layout (Force-directed)", "spring"),
	("Circular Layout", "circular"),
	("Kamada-Kawai Layout", "kamada_kawai"),
	("Fruchterman-Reingold Layout", "fruchterman_reingold"),
	("Shell Layout", "shell"),
	("Spectral Layout", "spectral"),
	("Planar Layout", "planar"),
	],
	label="Select Layout",
	value="spring",
	)

	gr.Markdown("### Edge Type Filters")
	gr.Markdown("Select which edge types to display:")

	# Create checkboxes for edge types with initial values
	edge_checkboxes = []
	for i in range(8): # Support up to 8 edge types
	if i < len(initial_edge_types):
	checkbox = gr.Checkbox(
	label=initial_edge_types[i], value=True, visible=True
	)
	else:
	checkbox = gr.Checkbox(label=f"Edge Type {i+1}", visible=False)
	edge_checkboxes.append(checkbox)

	visualize_btn = gr.Button("Visualize Graph", variant="primary")

	stats_text = gr.Textbox(
	label="Graph Statistics", lines=6, interactive=False
	)

	with gr.Column(scale=2):
	graph_plot = gr.Plot(label="Interactive Dependency Graph")

	# Set up event handlers
	all_inputs = [repo_dropdown, layout_dropdown] + edge_checkboxes

	visualize_btn.click(
	fn=plot_selected_repo,
	inputs=all_inputs,
	outputs=[graph_plot, stats_text],
	)

	# Update checkboxes when repository changes
	repo_dropdown.change(
	fn=update_edge_checkboxes,
	inputs=[repo_dropdown],
	outputs=edge_checkboxes,
	)

	# Auto-visualize on dropdown change
	repo_dropdown.change(
	fn=plot_selected_repo,
	inputs=all_inputs,
	outputs=[graph_plot, stats_text],
	)

	# Auto-visualize on layout change
	layout_dropdown.change(
	fn=plot_selected_repo,
	inputs=all_inputs,
	outputs=[graph_plot, stats_text],
	)

	# Auto-visualize on checkbox changes
	for checkbox in edge_checkboxes:
	checkbox.change(
	fn=plot_selected_repo,
	inputs=all_inputs,
	outputs=[graph_plot, stats_text],
	)