HenryStephen commited on
Commit
dd1a4ad
1 Parent(s): df845d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -66
app.py CHANGED
@@ -28,14 +28,14 @@ device = (
28
  )
29
 
30
  # 1. Product environment
31
- # INDEX_PATH = Path(__file__).parent.joinpath("data/index.bin")
32
- # CLUSTER_PATH = Path(__file__).parent.joinpath("data/repo_clusters.json")
33
  SCIBERT_MODEL_PATH = "allenai/scibert_scivocab_uncased"
34
 
35
 
36
  # 2. Developing environment
37
- INDEX_PATH = Path(__file__).parent.joinpath("data/index_test.bin")
38
- CLUSTER_PATH = Path(__file__).parent.joinpath("data/repo_clusters_test.json")
39
  # SCIBERT_MODEL_PATH = Path(__file__).parent.joinpath("data/scibert_scivocab_uncased") # Download locally
40
 
41
 
@@ -377,66 +377,75 @@ if __name__ == "__main__":
377
  )
378
 
379
  display_columns = st.session_state.display_columns
380
- code_sim_tab, doc_sim_tab, readme_sim_tab, requirement_sim_tab, repo_sim_tab, same_cluster_tab, diff_cluster_tab = st.tabs(
381
  ["Code_sim", "Docstring_sim", "Readme_sim", "Requirement_sim",
382
- "Repository_sim", "Same_cluster", "Different_cluster"])
383
-
384
- if query_doc.code_embedding is not None:
385
- code_sim_res = run_index_search(index, query_doc, "code_embedding", limit)
386
- cluster_numbers = run_cluster_search(repo_clusters, code_sim_res["name"])
387
- code_sim_res["cluster number"] = cluster_numbers
388
- code_sim_tab.dataframe(code_sim_res[display_columns])
389
- else:
390
- code_sim_tab.error("No function code was extracted for this repository!")
391
-
392
- if query_doc.doc_embedding is not None:
393
- doc_sim_res = run_index_search(index, query_doc, "doc_embedding", limit)
394
- cluster_numbers = run_cluster_search(repo_clusters, doc_sim_res["name"])
395
- doc_sim_res["cluster number"] = cluster_numbers
396
- doc_sim_tab.dataframe(doc_sim_res[display_columns])
397
- else:
398
- doc_sim_tab.error("No function docstring was extracted for this repository!")
399
-
400
- if query_doc.readme_embedding is not None:
401
- readme_sim_res = run_index_search(index, query_doc, "readme_embedding", limit)
402
- cluster_numbers = run_cluster_search(repo_clusters, readme_sim_res["name"])
403
- readme_sim_res["cluster number"] = cluster_numbers
404
- readme_sim_tab.dataframe(readme_sim_res[display_columns])
405
- else:
406
- readme_sim_tab.error("No readme file was extracted for this repository!")
407
-
408
- if query_doc.requirement_embedding is not None:
409
- requirement_sim_res = run_index_search(index, query_doc, "requirement_embedding", limit)
410
- cluster_numbers = run_cluster_search(repo_clusters, requirement_sim_res["name"])
411
- requirement_sim_res["cluster number"] = cluster_numbers
412
- requirement_sim_tab.dataframe(requirement_sim_res[display_columns])
413
- else:
414
- requirement_sim_tab.error("No requirement file was extracted for this repository!")
415
-
416
- if query_doc.repository_embedding is not None:
417
- repo_sim_res = run_index_search(index, query_doc, "repository_embedding", limit)
418
- cluster_numbers = run_cluster_search(repo_clusters, repo_sim_res["name"])
419
- repo_sim_res["cluster number"] = cluster_numbers
420
- repo_sim_tab.dataframe(repo_sim_res[display_columns])
421
- else:
422
- repo_sim_tab.error("No such useful information was extracted for this repository!")
423
-
424
- if cluster_number is not None and query_doc.repository_embedding is not None:
425
- same_cluster_df = run_similaritycal_search(index, repo_clusters, sim_cal_model,
426
- query_doc, cluster_number, limit,
427
- same_cluster=True)
428
- diff_cluster_df = run_similaritycal_search(index, repo_clusters, sim_cal_model,
429
- query_doc, cluster_number, limit,
430
- same_cluster=False)
431
- same_cluster_numbers = run_cluster_search(repo_clusters, same_cluster_df["name"])
432
- same_cluster_df["cluster number"] = same_cluster_numbers
433
-
434
- diff_cluster_numbers = run_cluster_search(repo_clusters, diff_cluster_df["name"])
435
- diff_cluster_df["cluster number"] = diff_cluster_numbers
436
-
437
- same_cluster_tab.dataframe(same_cluster_df[display_columns])
438
- diff_cluster_tab.dataframe(diff_cluster_df[display_columns])
439
-
440
- else:
441
- same_cluster_tab.error("No such useful information was extracted for this repository!")
442
- diff_cluster_tab.error("No such useful information was extracted for this repository!")
 
 
 
 
 
 
 
 
 
 
28
  )
29
 
30
  # 1. Product environment
31
+ INDEX_PATH = Path(__file__).parent.joinpath("data/index.bin")
32
+ CLUSTER_PATH = Path(__file__).parent.joinpath("data/repo_clusters.json")
33
  SCIBERT_MODEL_PATH = "allenai/scibert_scivocab_uncased"
34
 
35
 
36
  # 2. Developing environment
37
+ # INDEX_PATH = Path(__file__).parent.joinpath("data/index_test.bin")
38
+ # CLUSTER_PATH = Path(__file__).parent.joinpath("data/repo_clusters_test.json")
39
  # SCIBERT_MODEL_PATH = Path(__file__).parent.joinpath("data/scibert_scivocab_uncased") # Download locally
40
 
41
 
 
377
  )
378
 
379
  display_columns = st.session_state.display_columns
380
+ code_sim_tab, doc_sim_tab, readme_sim_tab, requirement_sim_tab, repo_sim_tab, cluster_tab, same_cluster_tab, = st.tabs(
381
  ["Code_sim", "Docstring_sim", "Readme_sim", "Requirement_sim",
382
+ "Repository_sim", "Cluster_sim", "Same_cluster_sim"])
383
+
384
+ with code_sim_tab:
385
+ if query_doc.code_embedding is not None:
386
+ code_sim_res = run_index_search(index, query_doc, "code_embedding", limit)
387
+ cluster_numbers = run_cluster_search(repo_clusters, code_sim_res["name"])
388
+ code_sim_res["cluster number"] = cluster_numbers
389
+ st.dataframe(code_sim_res[display_columns])
390
+ else:
391
+ st.error("No function code was extracted for this repository!")
392
+
393
+ with doc_sim_tab:
394
+ if query_doc.doc_embedding is not None:
395
+ doc_sim_res = run_index_search(index, query_doc, "doc_embedding", limit)
396
+ cluster_numbers = run_cluster_search(repo_clusters, doc_sim_res["name"])
397
+ doc_sim_res["cluster number"] = cluster_numbers
398
+ st.dataframe(doc_sim_res[display_columns])
399
+ else:
400
+ st.error("No function docstring was extracted for this repository!")
401
+
402
+ with readme_sim_tab:
403
+ if query_doc.readme_embedding is not None:
404
+ readme_sim_res = run_index_search(index, query_doc, "readme_embedding", limit)
405
+ cluster_numbers = run_cluster_search(repo_clusters, readme_sim_res["name"])
406
+ readme_sim_res["cluster number"] = cluster_numbers
407
+ st.dataframe(readme_sim_res[display_columns])
408
+ else:
409
+ st.error("No readme file was extracted for this repository!")
410
+
411
+ with requirement_sim_tab:
412
+ if query_doc.requirement_embedding is not None:
413
+ requirement_sim_res = run_index_search(index, query_doc, "requirement_embedding", limit)
414
+ cluster_numbers = run_cluster_search(repo_clusters, requirement_sim_res["name"])
415
+ requirement_sim_res["cluster number"] = cluster_numbers
416
+ st.dataframe(requirement_sim_res[display_columns])
417
+ else:
418
+ st.error("No requirement file was extracted for this repository!")
419
+
420
+ with repo_sim_tab:
421
+ if query_doc.repository_embedding is not None:
422
+ # Repo Sim tab
423
+ repo_sim_res = run_index_search(index, query_doc, "repository_embedding", limit)
424
+ cluster_numbers = run_cluster_search(repo_clusters, repo_sim_res["name"])
425
+ repo_sim_res["cluster number"] = cluster_numbers
426
+ st.dataframe(repo_sim_res[display_columns])
427
+ else:
428
+ st.error("No such useful information was extracted for this repository!")
429
+
430
+ with cluster_tab:
431
+ if query_doc.repository_embedding is not None:
432
+ cluster_df = run_similaritycal_search(index, repo_clusters, sim_cal_model,
433
+ query_doc, cluster_number, limit,
434
+ same_cluster=False)
435
+ cluster_numbers = run_cluster_search(repo_clusters, cluster_df["name"])
436
+ cluster_df["cluster number"] = cluster_numbers
437
+ st.dataframe(cluster_df[display_columns])
438
+ else:
439
+ st.error("No such useful information was extracted for this repository!")
440
+
441
+ with same_cluster_tab:
442
+ if query_doc.repository_embedding is not None:
443
+ # Cluster tab and same cluster tab
444
+ same_cluster_df = run_similaritycal_search(index, repo_clusters, sim_cal_model,
445
+ query_doc, cluster_number, limit,
446
+ same_cluster=True)
447
+ same_cluster_numbers = run_cluster_search(repo_clusters, same_cluster_df["name"])
448
+ same_cluster_df["cluster number"] = same_cluster_numbers
449
+ same_cluster_tab.dataframe(same_cluster_df[display_columns])
450
+ else:
451
+ same_cluster_tab.error("No such useful information was extracted for this repository!")