Spaces:

dengkane
/

learn-streamlit

Sleeping

File size: 2,788 Bytes

import streamlit as st
# To make things easier later, we're also importing numpy and pandas for
# working with sample data.
import torch
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('moka-ai/m3e-base')

#Our sentences we like to encode
sentences = [
    '* Moka 此文本嵌入模型由 MokaAI 训练并开源，训练脚本使用 uniem',
    '* Massive 此文本嵌入模型通过**千万级**的中文句对数据集进行训练',
    '* Mixed 此文本嵌入模型支持中英双语的同质文本相似度计算，异质文本检索等功能，未来还会支持代码检索，ALL in one'
]

#Sentences are encoded by calling model.encode()
embeddings = model.encode(sentences)

#Print the embeddings
for sentence, embedding in zip(sentences, embeddings):
    print("Sentence:", sentence)
    print("Embedding:", embedding)
    print("")


import faiss
d = embeddings.shape[1]  # Dimension of the embeddings
index = faiss.IndexFlatIP(d)  # Index that uses inner product (dot product) similarity

# Add the embeddings to the index
index.add(embeddings)

# Search for similar documents
query = "训练脚本."
input_ids = tokenizer.encode(query, return_tensors="pt")
with torch.no_grad():
    query_embedding = model(input_ids)[0][0].numpy()
k = 2  # Number of similar documents to retrieve
D, I = index.search(query_embedding.reshape(1, -1), k)

# Print the results
st.write(f"Query: {query}")
for i in range(k):
    st.write(f"Rank {i+1}: {texts[I[0][i]]} (similarity score: {D[0][i]})")

    
# Search index for the most similar content
k = 5  # Number of results to retrieve
D, I = index.search(np.array([question_embedding]), k)

# Display the results
st.write("Top {} similar content:".format(k))
for i in range(k):
    st.write("{}: {} : {}".format(i+1, knowledge[I[0][i]], I[0][i]))

st.title('My first app')

st.write("Here's our first attempt at using data to create a table:")

df = pd.DataFrame({
    'first column': [1, 2, 3, 4],
    'second column': [10, 20, 30, 40]
})

st.write(df)

if st.checkbox('Show dataframe'):
    chart_data = pd.DataFrame(
       np.random.randn(20, 3),
       columns=['a', 'b', 'c'])

    chart_data
    

option = st.selectbox(
    'Which number do you like best?',
     df['first column'])

st.write('You selected: ', option)

text1 = st.text('This is some text.')

if st.button('Say hello'):
    st.write('Why hello there')
else:
    st.write('Goodbye')


agree = st.checkbox('I agree')

if agree:
    st.write('Great!')
    
age = st.slider('How old are you?', 0, 130, 25)

st.write("I'm ", age, 'years old')

title = st.text_input('Movie title', 'Life of Brian')

st.write('The current movie title is', title)

number = st.number_input('Insert a number')

st.write('The current number is ', number)