lambdaofgod commited on
Commit
c752e68
·
1 Parent(s): a9b2499

updated embeddings

Browse files
app.py CHANGED
@@ -50,7 +50,10 @@ task_visualizations = TaskVisualizations(
50
  AppConfig.selected_task_counts_path,
51
  AppConfig.tasks_path,
52
  )
53
- display_df = pd.read_parquet("data/selected_repos_representations_umap2d.parquet")
 
 
 
54
  display_df["is_task"] = display_df["representation"] == "task"
55
  embedding_visualizer = EmbeddingVisualizer(display_df=display_df)
56
 
 
50
  AppConfig.selected_task_counts_path,
51
  AppConfig.tasks_path,
52
  )
53
+ display_df = datasets.load_dataset(
54
+ "lambdaofgod/pwc_github_search",
55
+ data_files="selected_repos_representations_umap2d.parquet",
56
+ )["train"].to_pandas()
57
  display_df["is_task"] = display_df["representation"] == "task"
58
  embedding_visualizer = EmbeddingVisualizer(display_df=display_df)
59
 
data/selected_repos_representations_umap2d.parquet DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:26f519620fb265574be6034ed18419b58fa7d345d17b9dc180a938ef3f37ecc8
3
- size 18983840
 
 
 
 
gradio_tabs.py CHANGED
@@ -35,10 +35,9 @@ def load_embeddings_description():
35
  return
36
 
37
 
38
- def setup_repository_representations_tab(repos_df, repos, representation_types):
39
-
40
  wordcloud_dict = get_representation_wordclouds(representation_types, repos_df)
41
- gr.Markdown("## Wordclouds")
42
  gr.Gallery(
43
  [
44
  (wordcloud, representation_type)
@@ -49,6 +48,10 @@ def setup_repository_representations_tab(repos_df, repos, representation_types):
49
  height=300,
50
  )
51
 
 
 
 
 
52
  gr.Markdown("Select a repository and two representation types to compare them.")
53
  with gr.Row():
54
  repo = gr.Dropdown(choices=repos, label="Repository", value=repos[0])
@@ -100,6 +103,8 @@ def setup_repository_representations_tab(repos_df, repos, representation_types):
100
  outputs=[displayed_tasks, text1, text2],
101
  )
102
 
 
 
103
 
104
  def setup_tasks_tab(descriptions, task_visualizations):
105
 
 
35
  return
36
 
37
 
38
+ def display_wordclouds(representation_types, repos_df):
 
39
  wordcloud_dict = get_representation_wordclouds(representation_types, repos_df)
40
+ gr.Markdown("# Wordclouds")
41
  gr.Gallery(
42
  [
43
  (wordcloud, representation_type)
 
48
  height=300,
49
  )
50
 
51
+
52
+ def setup_repository_representations_tab(repos_df, repos, representation_types):
53
+
54
+ gr.Markdown("# Comparing repository representations")
55
  gr.Markdown("Select a repository and two representation types to compare them.")
56
  with gr.Row():
57
  repo = gr.Dropdown(choices=repos, label="Repository", value=repos[0])
 
103
  outputs=[displayed_tasks, text1, text2],
104
  )
105
 
106
+ display_wordclouds(representation_types, repos_df)
107
+
108
 
109
  def setup_tasks_tab(descriptions, task_visualizations):
110
 
text_visualization.py CHANGED
@@ -84,7 +84,9 @@ class EmbeddingVisualizer(BaseModel):
84
 
85
  display_df = display_df.sort_values("representation", ascending=False)
86
  readme_df = display_df[
87
- display_df["representation"].isin(["readme", "generated_readme", "task"])
 
 
88
  ]
89
  raw_df = display_df[
90
  display_df["representation"].isin(
 
84
 
85
  display_df = display_df.sort_values("representation", ascending=False)
86
  readme_df = display_df[
87
+ display_df["representation"].isin(
88
+ ["readme", "code2doc_generated_readme", "task"]
89
+ )
90
  ]
91
  raw_df = display_df[
92
  display_df["representation"].isin(