Commit
·
c752e68
1
Parent(s):
a9b2499
updated embeddings
Browse files- app.py +4 -1
- data/selected_repos_representations_umap2d.parquet +0 -3
- gradio_tabs.py +8 -3
- text_visualization.py +3 -1
app.py
CHANGED
@@ -50,7 +50,10 @@ task_visualizations = TaskVisualizations(
|
|
50 |
AppConfig.selected_task_counts_path,
|
51 |
AppConfig.tasks_path,
|
52 |
)
|
53 |
-
display_df =
|
|
|
|
|
|
|
54 |
display_df["is_task"] = display_df["representation"] == "task"
|
55 |
embedding_visualizer = EmbeddingVisualizer(display_df=display_df)
|
56 |
|
|
|
50 |
AppConfig.selected_task_counts_path,
|
51 |
AppConfig.tasks_path,
|
52 |
)
|
53 |
+
display_df = datasets.load_dataset(
|
54 |
+
"lambdaofgod/pwc_github_search",
|
55 |
+
data_files="selected_repos_representations_umap2d.parquet",
|
56 |
+
)["train"].to_pandas()
|
57 |
display_df["is_task"] = display_df["representation"] == "task"
|
58 |
embedding_visualizer = EmbeddingVisualizer(display_df=display_df)
|
59 |
|
data/selected_repos_representations_umap2d.parquet
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:26f519620fb265574be6034ed18419b58fa7d345d17b9dc180a938ef3f37ecc8
|
3 |
-
size 18983840
|
|
|
|
|
|
|
|
gradio_tabs.py
CHANGED
@@ -35,10 +35,9 @@ def load_embeddings_description():
|
|
35 |
return
|
36 |
|
37 |
|
38 |
-
def
|
39 |
-
|
40 |
wordcloud_dict = get_representation_wordclouds(representation_types, repos_df)
|
41 |
-
gr.Markdown("
|
42 |
gr.Gallery(
|
43 |
[
|
44 |
(wordcloud, representation_type)
|
@@ -49,6 +48,10 @@ def setup_repository_representations_tab(repos_df, repos, representation_types):
|
|
49 |
height=300,
|
50 |
)
|
51 |
|
|
|
|
|
|
|
|
|
52 |
gr.Markdown("Select a repository and two representation types to compare them.")
|
53 |
with gr.Row():
|
54 |
repo = gr.Dropdown(choices=repos, label="Repository", value=repos[0])
|
@@ -100,6 +103,8 @@ def setup_repository_representations_tab(repos_df, repos, representation_types):
|
|
100 |
outputs=[displayed_tasks, text1, text2],
|
101 |
)
|
102 |
|
|
|
|
|
103 |
|
104 |
def setup_tasks_tab(descriptions, task_visualizations):
|
105 |
|
|
|
35 |
return
|
36 |
|
37 |
|
38 |
+
def display_wordclouds(representation_types, repos_df):
|
|
|
39 |
wordcloud_dict = get_representation_wordclouds(representation_types, repos_df)
|
40 |
+
gr.Markdown("# Wordclouds")
|
41 |
gr.Gallery(
|
42 |
[
|
43 |
(wordcloud, representation_type)
|
|
|
48 |
height=300,
|
49 |
)
|
50 |
|
51 |
+
|
52 |
+
def setup_repository_representations_tab(repos_df, repos, representation_types):
|
53 |
+
|
54 |
+
gr.Markdown("# Comparing repository representations")
|
55 |
gr.Markdown("Select a repository and two representation types to compare them.")
|
56 |
with gr.Row():
|
57 |
repo = gr.Dropdown(choices=repos, label="Repository", value=repos[0])
|
|
|
103 |
outputs=[displayed_tasks, text1, text2],
|
104 |
)
|
105 |
|
106 |
+
display_wordclouds(representation_types, repos_df)
|
107 |
+
|
108 |
|
109 |
def setup_tasks_tab(descriptions, task_visualizations):
|
110 |
|
text_visualization.py
CHANGED
@@ -84,7 +84,9 @@ class EmbeddingVisualizer(BaseModel):
|
|
84 |
|
85 |
display_df = display_df.sort_values("representation", ascending=False)
|
86 |
readme_df = display_df[
|
87 |
-
display_df["representation"].isin(
|
|
|
|
|
88 |
]
|
89 |
raw_df = display_df[
|
90 |
display_df["representation"].isin(
|
|
|
84 |
|
85 |
display_df = display_df.sort_values("representation", ascending=False)
|
86 |
readme_df = display_df[
|
87 |
+
display_df["representation"].isin(
|
88 |
+
["readme", "code2doc_generated_readme", "task"]
|
89 |
+
)
|
90 |
]
|
91 |
raw_df = display_df[
|
92 |
display_df["representation"].isin(
|