Spaces:

myshirk
/

semantic-search-datasets

Runtime error

App Files Files Community

myshirk commited on Apr 18, 2022

Commit

6370188

•

1 Parent(s): 1739c08

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -3

app.py CHANGED Viewed

@@ -3,6 +3,8 @@ from sentence_transformers import SentenceTransformer, util
 from bs4 import BeautifulSoup
 import pandas as pd
 import requests
 def find_abstracts(soup):
   #df = pd.DataFrame(columns = ["identifier", "abstract"])
@@ -40,9 +42,50 @@ def get_metadata():
   return df
-def show_model():
-  return
 def main():
   st.title("Semantic Search for Datasets Using Sentence Transformers")
@@ -77,6 +120,13 @@ def main():
   results = show_model(query)
   st.image("pres-futureplans.png")
 main()

 from bs4 import BeautifulSoup
 import pandas as pd
 import requests
+import os
+import time
 def find_abstracts(soup):
   #df = pd.DataFrame(columns = ["identifier", "abstract"])
   return df
+def show_model(query):
+  path = "./ncei-metadata.csv"
+  if os.path.exists(path):
+    last_modified = os.path.getmtime(path)
+    now = time.time()
+    DAY = 86400
+    if (now - last_modified > DAY):
+      df = get_metadata()
+    else:
+      df = pd.read_csv(path)
+  else:
+    df = get_metadata()
+  # Make the abstracts the docs
+  docs_df = df[df["abstract"] != "NA"]
+  docs = list(docs_df["abstract"])
+  titles = list(docs_df["title"])
+  # Query
+  query = input("Enter your query: ")
+  # predict on a search query for data
+  #Load the model
+  model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
+  #Encode query and documents
+  query_emb = model.encode(query)
+  doc_emb = model.encode(docs)
+  #Compute dot score between query and all document embeddings
+  scores = util.dot_score(query_emb, doc_emb)[0].cpu().tolist()
+  #Combine docs & scores
+  doc_score_pairs = list(zip(docs, scores, titles))
+  #Sort by decreasing score
+  doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
+  return doc_score_pairs
 def main():
   st.title("Semantic Search for Datasets Using Sentence Transformers")
   results = show_model(query)
+  #Output passages & scores
+  for doc, score, title in results[:10]:
+    print("Score: %f" %score)
+    print("Title: %s" %title)
+    print("Abstract: %s" %doc)
+    print('\n')
   st.image("pres-futureplans.png")
 main()