Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,50 +1,32 @@
|
|
1 |
import streamlit as st
|
2 |
# To make things easier later, we're also importing numpy and pandas for
|
3 |
# working with sample data.
|
4 |
-
import torch
|
5 |
from sentence_transformers import SentenceTransformer
|
6 |
-
|
7 |
-
model = SentenceTransformer('moka-ai/m3e-base')
|
8 |
-
|
9 |
-
#Our sentences we like to encode
|
10 |
-
sentences = [
|
11 |
-
'* Moka 此文本嵌入模型由 MokaAI 训练并开源,训练脚本使用 uniem',
|
12 |
-
'* Massive 此文本嵌入模型通过**千万级**的中文句对数据集进行训练',
|
13 |
-
'* Mixed 此文本嵌入模型支持中英双语的同质文本相似度计算,异质文本检索等功能,未来还会支持代码检索,ALL in one'
|
14 |
-
]
|
15 |
-
|
16 |
-
#Sentences are encoded by calling model.encode()
|
17 |
-
embeddings = model.encode(sentences)
|
18 |
-
|
19 |
-
#Print the embeddings
|
20 |
-
for sentence, embedding in zip(sentences, embeddings):
|
21 |
-
print("Sentence:", sentence)
|
22 |
-
print("Embedding:", embedding)
|
23 |
-
print("")
|
24 |
-
|
25 |
-
|
26 |
import faiss
|
27 |
-
|
28 |
-
index = faiss.IndexFlatIP(d) # Index that uses inner product (dot product) similarity
|
29 |
|
30 |
-
#
|
31 |
-
|
32 |
|
33 |
-
#
|
34 |
-
|
|
|
35 |
|
36 |
-
|
|
|
|
|
37 |
|
|
|
|
|
38 |
query_embedding = model.encode([query])[0]
|
39 |
|
40 |
-
#
|
41 |
-
|
42 |
-
|
43 |
-
# Get the index of the most similar document
|
44 |
-
most_similar_index = similarities.argmax()
|
45 |
|
46 |
# Print the most similar document
|
47 |
-
|
|
|
|
|
48 |
|
49 |
st.title('My first app')
|
50 |
|
|
|
1 |
import streamlit as st
|
2 |
# To make things easier later, we're also importing numpy and pandas for
|
3 |
# working with sample data.
|
|
|
4 |
from sentence_transformers import SentenceTransformer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
import faiss
|
6 |
+
import numpy as np
|
|
|
7 |
|
8 |
+
# Load the moka-ai/m3e-base model
|
9 |
+
model = SentenceTransformer("moka-ai/m3e-base")
|
10 |
|
11 |
+
# Encode the documents into embeddings
|
12 |
+
documents = ["Document 1", "Document 2", "Document 3"]
|
13 |
+
document_embeddings = model.encode(documents)
|
14 |
|
15 |
+
# Store the embeddings to FAISS
|
16 |
+
index = faiss.IndexFlatIP(document_embeddings.shape[1])
|
17 |
+
index.add(document_embeddings)
|
18 |
|
19 |
+
# Encode the query into an embedding
|
20 |
+
query = "2"
|
21 |
query_embedding = model.encode([query])[0]
|
22 |
|
23 |
+
# Search the FAISS index for the most similar document
|
24 |
+
D, I = index.search(np.array([query_embedding]), k=1)
|
|
|
|
|
|
|
25 |
|
26 |
# Print the most similar document
|
27 |
+
print(documents[I[0][0]])
|
28 |
+
|
29 |
+
#======================================================================
|
30 |
|
31 |
st.title('My first app')
|
32 |
|