Spaces:

dengkane
/

learn-streamlit

Sleeping

App Files Files Community

dengkane commited on Aug 30, 2023

Commit

707d585

•

1 Parent(s): 418fa4d

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -34

app.py CHANGED Viewed

@@ -1,50 +1,32 @@
 import streamlit as st
 # To make things easier later, we're also importing numpy and pandas for
 # working with sample data.
-import torch
 from sentence_transformers import SentenceTransformer
-model = SentenceTransformer('moka-ai/m3e-base')
-#Our sentences we like to encode
-sentences = [
-    '* Moka 此文本嵌入模型由 MokaAI 训练并开源，训练脚本使用 uniem',
-    '* Massive 此文本嵌入模型通过**千万级**的中文句对数据集进行训练',
-    '* Mixed 此文本嵌入模型支持中英双语的同质文本相似度计算，异质文本检索等功能，未来还会支持代码检索，ALL in one'
-]
-#Sentences are encoded by calling model.encode()
-embeddings = model.encode(sentences)
-#Print the embeddings
-for sentence, embedding in zip(sentences, embeddings):
-    print("Sentence:", sentence)
-    print("Embedding:", embedding)
-    print("")
 import faiss
-d = embeddings.shape[1]  # Dimension of the embeddings
-index = faiss.IndexFlatIP(d)  # Index that uses inner product (dot product) similarity
-# Add the embeddings to the index
-index.add(embeddings)
-# Search for similar documents
-query = "训练脚本."
-from sklearn.metrics.pairwise import cosine_similarity
 query_embedding = model.encode([query])[0]
-# Compute the cosine similarity between the query embedding and the document embeddings
-similarities = cosine_similarity([query_embedding], embeddings)[0]
-# Get the index of the most similar document
-most_similar_index = similarities.argmax()
 # Print the most similar document
-st.write(documents[most_similar_index])
 st.title('My first app')

 import streamlit as st
 # To make things easier later, we're also importing numpy and pandas for
 # working with sample data.
 from sentence_transformers import SentenceTransformer
 import faiss
+import numpy as np
+# Load the moka-ai/m3e-base model
+model = SentenceTransformer("moka-ai/m3e-base")
+# Encode the documents into embeddings
+documents = ["Document 1", "Document 2", "Document 3"]
+document_embeddings = model.encode(documents)
+# Store the embeddings to FAISS
+index = faiss.IndexFlatIP(document_embeddings.shape[1])
+index.add(document_embeddings)
+# Encode the query into an embedding
+query = "2"
 query_embedding = model.encode([query])[0]
+# Search the FAISS index for the most similar document
+D, I = index.search(np.array([query_embedding]), k=1)
 # Print the most similar document
+print(documents[I[0][0]])
+#======================================================================
 st.title('My first app')