ccm commited on
Commit
6a4b3a2
·
verified ·
1 Parent(s): 9cf274d

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +14 -9
main.py CHANGED
@@ -1,17 +1,20 @@
 
 
1
  import gradio
 
 
2
  import numpy
3
  import pandas
4
  import sentence_transformers
5
- import datasets
6
  import faiss
7
 
8
  model = sentence_transformers.SentenceTransformer('allenai-specter')
9
 
10
  full_data = datasets.load_dataset("ccm/publications")['train'].to_pandas()
11
 
12
- substring = "0P9w_S0AAAAJ:yB1At4FlUx8C"
13
- filter = full_data['author_pub_id'].str.contains(substring)
14
- data = full_data[~filter]
15
 
16
  dimensionality = len(data['embedding'][0])
17
  index = faiss.IndexFlatL2(dimensionality)
@@ -22,19 +25,21 @@ index.add(vectors)
22
 
23
  def search(query, k):
24
  query = numpy.expand_dims(model.encode(query), axis=0)
25
- _, I = top_five = index.search(query, k)
26
  top_five = data.loc[I[0]]
27
  search_results = ""
28
 
29
  for i in range(k):
30
- search_results += str(i+1) + ". "
31
- search_results += '"' + top_five["bibtex"].values[i]["title"] + '" '
32
  search_results += top_five["bibtex"].values[i]["citation"]
33
  if top_five["pub_url"].values[i] is not None:
34
- search_results += " [Paper](" + top_five["pub_url"].values[i] + ")"
35
- search_results += "\n"
 
 
36
  return search_results
37
 
 
38
  with gradio.Blocks() as demo:
39
  with gradio.Group():
40
  query = gradio.Textbox(placeholder="Enter search terms...", show_label=False, lines=1, max_lines=1)
 
1
+ import json
2
+
3
  import gradio
4
+ import datasets
5
+
6
  import numpy
7
  import pandas
8
  import sentence_transformers
 
9
  import faiss
10
 
11
  model = sentence_transformers.SentenceTransformer('allenai-specter')
12
 
13
  full_data = datasets.load_dataset("ccm/publications")['train'].to_pandas()
14
 
15
+ filter = ["\"abstract\": null" in json.dumps(bibdict) for bibdict in full_data['bibtex'].values]
16
+ data = full_data[~pandas.Series(filter)]
17
+ data.reset_index(inplace=True)
18
 
19
  dimensionality = len(data['embedding'][0])
20
  index = faiss.IndexFlatL2(dimensionality)
 
25
 
26
  def search(query, k):
27
  query = numpy.expand_dims(model.encode(query), axis=0)
28
+ _, I = index.search(query, k)
29
  top_five = data.loc[I[0]]
30
  search_results = ""
31
 
32
  for i in range(k):
33
+ search_results += '## ' + top_five["bibtex"].values[i]["title"] + '\n'
 
34
  search_results += top_five["bibtex"].values[i]["citation"]
35
  if top_five["pub_url"].values[i] is not None:
36
+ search_results += ", [Paper](" + top_five["pub_url"].values[i] + ")"
37
+ search_results += "\t\n```\n"
38
+ search_results += json.dumps(top_five["bibtex"].values[i], indent=4)
39
+ search_results += "\t\n```\n"
40
  return search_results
41
 
42
+
43
  with gradio.Blocks() as demo:
44
  with gradio.Group():
45
  query = gradio.Textbox(placeholder="Enter search terms...", show_label=False, lines=1, max_lines=1)