Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,8 @@ from sentence_transformers import SentenceTransformer, util
|
|
3 |
from bs4 import BeautifulSoup
|
4 |
import pandas as pd
|
5 |
import requests
|
|
|
|
|
6 |
|
7 |
def find_abstracts(soup):
|
8 |
#df = pd.DataFrame(columns = ["identifier", "abstract"])
|
@@ -40,9 +42,50 @@ def get_metadata():
|
|
40 |
|
41 |
return df
|
42 |
|
43 |
-
def show_model():
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
def main():
|
48 |
st.title("Semantic Search for Datasets Using Sentence Transformers")
|
@@ -77,6 +120,13 @@ def main():
|
|
77 |
|
78 |
results = show_model(query)
|
79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
st.image("pres-futureplans.png")
|
81 |
|
82 |
main()
|
|
|
3 |
from bs4 import BeautifulSoup
|
4 |
import pandas as pd
|
5 |
import requests
|
6 |
+
import os
|
7 |
+
import time
|
8 |
|
9 |
def find_abstracts(soup):
|
10 |
#df = pd.DataFrame(columns = ["identifier", "abstract"])
|
|
|
42 |
|
43 |
return df
|
44 |
|
45 |
+
def show_model(query):
|
46 |
+
path = "./ncei-metadata.csv"
|
47 |
+
|
48 |
+
if os.path.exists(path):
|
49 |
+
|
50 |
+
last_modified = os.path.getmtime(path)
|
51 |
+
now = time.time()
|
52 |
+
DAY = 86400
|
53 |
+
|
54 |
+
if (now - last_modified > DAY):
|
55 |
+
df = get_metadata()
|
56 |
+
|
57 |
+
else:
|
58 |
+
df = pd.read_csv(path)
|
59 |
+
else:
|
60 |
+
df = get_metadata()
|
61 |
+
|
62 |
+
|
63 |
+
# Make the abstracts the docs
|
64 |
+
docs_df = df[df["abstract"] != "NA"]
|
65 |
+
docs = list(docs_df["abstract"])
|
66 |
+
titles = list(docs_df["title"])
|
67 |
+
|
68 |
+
# Query
|
69 |
+
query = input("Enter your query: ")
|
70 |
+
|
71 |
+
# predict on a search query for data
|
72 |
+
|
73 |
+
#Load the model
|
74 |
+
model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
|
75 |
+
|
76 |
+
#Encode query and documents
|
77 |
+
query_emb = model.encode(query)
|
78 |
+
doc_emb = model.encode(docs)
|
79 |
+
|
80 |
+
#Compute dot score between query and all document embeddings
|
81 |
+
scores = util.dot_score(query_emb, doc_emb)[0].cpu().tolist()
|
82 |
+
|
83 |
+
#Combine docs & scores
|
84 |
+
doc_score_pairs = list(zip(docs, scores, titles))
|
85 |
+
|
86 |
+
#Sort by decreasing score
|
87 |
+
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
|
88 |
+
return doc_score_pairs
|
89 |
|
90 |
def main():
|
91 |
st.title("Semantic Search for Datasets Using Sentence Transformers")
|
|
|
120 |
|
121 |
results = show_model(query)
|
122 |
|
123 |
+
#Output passages & scores
|
124 |
+
for doc, score, title in results[:10]:
|
125 |
+
print("Score: %f" %score)
|
126 |
+
print("Title: %s" %title)
|
127 |
+
print("Abstract: %s" %doc)
|
128 |
+
print('\n')
|
129 |
+
|
130 |
st.image("pres-futureplans.png")
|
131 |
|
132 |
main()
|