File size: 2,788 Bytes
69881c2
 
 
83db5d1
d995b49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a3c951
44df7d3
 
800a230
44df7d3
1a3c951
 
44df7d3
d995b49
44df7d3
 
 
 
 
1a3c951
44df7d3
 
 
 
1a3c951
44df7d3
1a3c951
 
 
 
 
 
 
bddb13b
1a3c951
69881c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import streamlit as st
# To make things easier later, we're also importing numpy and pandas for
# working with sample data.
import torch
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('moka-ai/m3e-base')

#Our sentences we like to encode
sentences = [
    '* Moka 此文本嵌入模型由 MokaAI 训练并开源,训练脚本使用 uniem',
    '* Massive 此文本嵌入模型通过**千万级**的中文句对数据集进行训练',
    '* Mixed 此文本嵌入模型支持中英双语的同质文本相似度计算,异质文本检索等功能,未来还会支持代码检索,ALL in one'
]

#Sentences are encoded by calling model.encode()
embeddings = model.encode(sentences)

#Print the embeddings
for sentence, embedding in zip(sentences, embeddings):
    print("Sentence:", sentence)
    print("Embedding:", embedding)
    print("")


import faiss
d = embeddings.shape[1]  # Dimension of the embeddings
index = faiss.IndexFlatIP(d)  # Index that uses inner product (dot product) similarity

# Add the embeddings to the index
index.add(embeddings)

# Search for similar documents
query = "训练脚本."
input_ids = tokenizer.encode(query, return_tensors="pt")
with torch.no_grad():
    query_embedding = model(input_ids)[0][0].numpy()
k = 2  # Number of similar documents to retrieve
D, I = index.search(query_embedding.reshape(1, -1), k)

# Print the results
st.write(f"Query: {query}")
for i in range(k):
    st.write(f"Rank {i+1}: {texts[I[0][i]]} (similarity score: {D[0][i]})")

    
# Search index for the most similar content
k = 5  # Number of results to retrieve
D, I = index.search(np.array([question_embedding]), k)

# Display the results
st.write("Top {} similar content:".format(k))
for i in range(k):
    st.write("{}: {} : {}".format(i+1, knowledge[I[0][i]], I[0][i]))

st.title('My first app')

st.write("Here's our first attempt at using data to create a table:")

df = pd.DataFrame({
    'first column': [1, 2, 3, 4],
    'second column': [10, 20, 30, 40]
})

st.write(df)

if st.checkbox('Show dataframe'):
    chart_data = pd.DataFrame(
       np.random.randn(20, 3),
       columns=['a', 'b', 'c'])

    chart_data
    

option = st.selectbox(
    'Which number do you like best?',
     df['first column'])

st.write('You selected: ', option)

text1 = st.text('This is some text.')

if st.button('Say hello'):
    st.write('Why hello there')
else:
    st.write('Goodbye')


agree = st.checkbox('I agree')

if agree:
    st.write('Great!')
    
age = st.slider('How old are you?', 0, 130, 25)

st.write("I'm ", age, 'years old')

title = st.text_input('Movie title', 'Life of Brian')

st.write('The current movie title is', title)

number = st.number_input('Insert a number')

st.write('The current number is ', number)