dengkane commited on
Commit
707d585
1 Parent(s): 418fa4d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -34
app.py CHANGED
@@ -1,50 +1,32 @@
1
  import streamlit as st
2
  # To make things easier later, we're also importing numpy and pandas for
3
  # working with sample data.
4
- import torch
5
  from sentence_transformers import SentenceTransformer
6
-
7
- model = SentenceTransformer('moka-ai/m3e-base')
8
-
9
- #Our sentences we like to encode
10
- sentences = [
11
- '* Moka 此文本嵌入模型由 MokaAI 训练并开源,训练脚本使用 uniem',
12
- '* Massive 此文本嵌入模型通过**千万级**的中文句对数据集进行训练',
13
- '* Mixed 此文本嵌入模型支持中英双语的同质文本相似度计算,异质文本检索等功能,未来还会支持代码检索,ALL in one'
14
- ]
15
-
16
- #Sentences are encoded by calling model.encode()
17
- embeddings = model.encode(sentences)
18
-
19
- #Print the embeddings
20
- for sentence, embedding in zip(sentences, embeddings):
21
- print("Sentence:", sentence)
22
- print("Embedding:", embedding)
23
- print("")
24
-
25
-
26
  import faiss
27
- d = embeddings.shape[1] # Dimension of the embeddings
28
- index = faiss.IndexFlatIP(d) # Index that uses inner product (dot product) similarity
29
 
30
- # Add the embeddings to the index
31
- index.add(embeddings)
32
 
33
- # Search for similar documents
34
- query = "训练脚本."
 
35
 
36
- from sklearn.metrics.pairwise import cosine_similarity
 
 
37
 
 
 
38
  query_embedding = model.encode([query])[0]
39
 
40
- # Compute the cosine similarity between the query embedding and the document embeddings
41
- similarities = cosine_similarity([query_embedding], embeddings)[0]
42
-
43
- # Get the index of the most similar document
44
- most_similar_index = similarities.argmax()
45
 
46
  # Print the most similar document
47
- st.write(documents[most_similar_index])
 
 
48
 
49
  st.title('My first app')
50
 
 
1
  import streamlit as st
2
  # To make things easier later, we're also importing numpy and pandas for
3
  # working with sample data.
 
4
  from sentence_transformers import SentenceTransformer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  import faiss
6
+ import numpy as np
 
7
 
8
+ # Load the moka-ai/m3e-base model
9
+ model = SentenceTransformer("moka-ai/m3e-base")
10
 
11
+ # Encode the documents into embeddings
12
+ documents = ["Document 1", "Document 2", "Document 3"]
13
+ document_embeddings = model.encode(documents)
14
 
15
+ # Store the embeddings to FAISS
16
+ index = faiss.IndexFlatIP(document_embeddings.shape[1])
17
+ index.add(document_embeddings)
18
 
19
+ # Encode the query into an embedding
20
+ query = "2"
21
  query_embedding = model.encode([query])[0]
22
 
23
+ # Search the FAISS index for the most similar document
24
+ D, I = index.search(np.array([query_embedding]), k=1)
 
 
 
25
 
26
  # Print the most similar document
27
+ print(documents[I[0][0]])
28
+
29
+ #======================================================================
30
 
31
  st.title('My first app')
32