Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -32,26 +32,19 @@ index.add(embeddings)
|
|
32 |
|
33 |
# Search for similar documents
|
34 |
query = "训练脚本."
|
35 |
-
input_ids = tokenizer.encode(query, return_tensors="pt")
|
36 |
-
with torch.no_grad():
|
37 |
-
query_embedding = model(input_ids)[0][0].numpy()
|
38 |
-
k = 2 # Number of similar documents to retrieve
|
39 |
-
D, I = index.search(query_embedding.reshape(1, -1), k)
|
40 |
|
41 |
-
|
42 |
-
st.write(f"Query: {query}")
|
43 |
-
for i in range(k):
|
44 |
-
st.write(f"Rank {i+1}: {texts[I[0][i]]} (similarity score: {D[0][i]})")
|
45 |
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
#
|
52 |
-
|
53 |
-
|
54 |
-
|
|
|
55 |
|
56 |
st.title('My first app')
|
57 |
|
|
|
32 |
|
33 |
# Search for similar documents
|
34 |
query = "训练脚本."
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
|
|
|
|
|
|
37 |
|
38 |
+
query_embedding = model.encode([query])[0]
|
39 |
+
|
40 |
+
# Compute the cosine similarity between the query embedding and the document embeddings
|
41 |
+
similarities = cosine_similarity([query_embedding], embeddings)[0]
|
42 |
+
|
43 |
+
# Get the index of the most similar document
|
44 |
+
most_similar_index = similarities.argmax()
|
45 |
+
|
46 |
+
# Print the most similar document
|
47 |
+
st.write(documents[most_similar_index])
|
48 |
|
49 |
st.title('My first app')
|
50 |
|