File size: 3,040 Bytes
69881c2
 
 
 
 
1a3c951
69881c2
3e8419b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d55e7f9
 
 
 
3e8419b
 
1a3c951
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69881c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import streamlit as st
# To make things easier later, we're also importing numpy and pandas for
# working with sample data.
import numpy as np
import pandas as pd
import faiss

from sentence_transformers import SentenceTransformer

model = SentenceTransformer('moka-ai/m3e-base')

#Our sentences we like to encode
sentences = [
    '* Moka 此文本嵌入模型由 MokaAI 训练并开源,训练脚本使用 uniem',
    '* Massive 此文本嵌入模型通过**千万级**的中文句对数据集进行训练',
    '* Mixed 此文本嵌入模型支持中英双语的同质文本相似度计算,异质文本检索等功能,未来还会支持代码检索,ALL in one'
]

#Sentences are encoded by calling model.encode()
embeddings = model.encode(sentences)

#Print the embeddings
#for sentence, embedding in zip(sentences, embeddings):
#    st.write("Sentence:", sentence)
#    st.write("Embedding:", embedding)
#    st.write("")


def get_embedding(text_content):
    return model.encode(text_content)

# Load the text file as knowledge
knowledge_file = 'knowledge.txt'
knowledge = []
with open(knowledge_file, 'r', encoding='utf-8') as file:
    for line in file:
        knowledge.append(line.strip())

# Create an index
index = faiss.IndexFlatIP(300)  # Use Inner Product (IP) as similarity measure

# Perform embedding for the knowledge texts and add to index
embeddings = []
for text in knowledge:
    # Add your code here for text embedding (e.g., using word embeddings, sentence transformers, etc.)
    embedding = get_embedding(text)
    embeddings.append(embedding)
embeddings = np.array(embeddings)
index.add(embeddings)

# Get user input for a question
question = st.text_input("Enter your question: ")

# Perform embedding for the question
question_embedding = get_embedding(question)

# Search index for the most similar content
k = 5  # Number of results to retrieve
D, I = index.search(np.array([question_embedding]), k)

# Display the results
st.write("Top {} similar content:".format(k))
for i in range(k):
    st.write("{}: {}".format(i+1, knowledge[I[0][i]]))

st.title('My first app')

st.write("Here's our first attempt at using data to create a table:")

df = pd.DataFrame({
    'first column': [1, 2, 3, 4],
    'second column': [10, 20, 30, 40]
})

st.write(df)

if st.checkbox('Show dataframe'):
    chart_data = pd.DataFrame(
       np.random.randn(20, 3),
       columns=['a', 'b', 'c'])

    chart_data
    

option = st.selectbox(
    'Which number do you like best?',
     df['first column'])

st.write('You selected: ', option)

text1 = st.text('This is some text.')

if st.button('Say hello'):
    st.write('Why hello there')
else:
    st.write('Goodbye')


agree = st.checkbox('I agree')

if agree:
    st.write('Great!')
    
age = st.slider('How old are you?', 0, 130, 25)

st.write("I'm ", age, 'years old')

title = st.text_input('Movie title', 'Life of Brian')

st.write('The current movie title is', title)

number = st.number_input('Insert a number')

st.write('The current number is ', number)