myshirk commited on
Commit
6370188
1 Parent(s): 1739c08

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -3
app.py CHANGED
@@ -3,6 +3,8 @@ from sentence_transformers import SentenceTransformer, util
3
  from bs4 import BeautifulSoup
4
  import pandas as pd
5
  import requests
 
 
6
 
7
  def find_abstracts(soup):
8
  #df = pd.DataFrame(columns = ["identifier", "abstract"])
@@ -40,9 +42,50 @@ def get_metadata():
40
 
41
  return df
42
 
43
- def show_model():
44
-
45
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  def main():
48
  st.title("Semantic Search for Datasets Using Sentence Transformers")
@@ -77,6 +120,13 @@ def main():
77
 
78
  results = show_model(query)
79
 
 
 
 
 
 
 
 
80
  st.image("pres-futureplans.png")
81
 
82
  main()
 
3
  from bs4 import BeautifulSoup
4
  import pandas as pd
5
  import requests
6
+ import os
7
+ import time
8
 
9
  def find_abstracts(soup):
10
  #df = pd.DataFrame(columns = ["identifier", "abstract"])
 
42
 
43
  return df
44
 
45
+ def show_model(query):
46
+ path = "./ncei-metadata.csv"
47
+
48
+ if os.path.exists(path):
49
+
50
+ last_modified = os.path.getmtime(path)
51
+ now = time.time()
52
+ DAY = 86400
53
+
54
+ if (now - last_modified > DAY):
55
+ df = get_metadata()
56
+
57
+ else:
58
+ df = pd.read_csv(path)
59
+ else:
60
+ df = get_metadata()
61
+
62
+
63
+ # Make the abstracts the docs
64
+ docs_df = df[df["abstract"] != "NA"]
65
+ docs = list(docs_df["abstract"])
66
+ titles = list(docs_df["title"])
67
+
68
+ # Query
69
+ query = input("Enter your query: ")
70
+
71
+ # predict on a search query for data
72
+
73
+ #Load the model
74
+ model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
75
+
76
+ #Encode query and documents
77
+ query_emb = model.encode(query)
78
+ doc_emb = model.encode(docs)
79
+
80
+ #Compute dot score between query and all document embeddings
81
+ scores = util.dot_score(query_emb, doc_emb)[0].cpu().tolist()
82
+
83
+ #Combine docs & scores
84
+ doc_score_pairs = list(zip(docs, scores, titles))
85
+
86
+ #Sort by decreasing score
87
+ doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
88
+ return doc_score_pairs
89
 
90
  def main():
91
  st.title("Semantic Search for Datasets Using Sentence Transformers")
 
120
 
121
  results = show_model(query)
122
 
123
+ #Output passages & scores
124
+ for doc, score, title in results[:10]:
125
+ print("Score: %f" %score)
126
+ print("Title: %s" %title)
127
+ print("Abstract: %s" %doc)
128
+ print('\n')
129
+
130
  st.image("pres-futureplans.png")
131
 
132
  main()