import streamlit as st # To make things easier later, we're also importing numpy and pandas for # working with sample data. import numpy as np import pandas as pd import faiss from sentence_transformers import SentenceTransformer model = SentenceTransformer('moka-ai/m3e-base') #Our sentences we like to encode sentences = [ '* Moka 此文本嵌入模型由 MokaAI 训练并开源,训练脚本使用 uniem', '* Massive 此文本嵌入模型通过**千万级**的中文句对数据集进行训练', '* Mixed 此文本嵌入模型支持中英双语的同质文本相似度计算,异质文本检索等功能,未来还会支持代码检索,ALL in one' ] #Sentences are encoded by calling model.encode() embeddings = model.encode(sentences) #Print the embeddings #for sentence, embedding in zip(sentences, embeddings): # st.write("Sentence:", sentence) # st.write("Embedding:", embedding) # st.write("") def get_embedding(text_content): return model.encode(text_content) # Load the text file as knowledge knowledge_file = 'knowledge.txt' knowledge = [] with open(knowledge_file, 'r', encoding='utf-8') as file: for line in file: knowledge.append(line.strip()) # Perform embedding for the knowledge texts and add to index embeddings = [] for text in knowledge: # Add your code here for text embedding (e.g., using word embeddings, sentence transformers, etc.) embedding = get_embedding(text) embeddings.append(embedding) embeddings = np.array(embeddings) # Create an index index = faiss.IndexFlatIP(embeddings.shape[1]) # Use Inner Product (IP) as similarity measure index.add(embeddings) # Get user input for a question question = st.text_input("Enter your question: ") # Perform embedding for the question question_embedding = get_embedding(question) # Search index for the most similar content k = 5 # Number of results to retrieve D, I = index.search(np.array([question_embedding]), k) # Display the results st.write("Top {} similar content:".format(k)) for i in range(k): st.write("{}: {}".format(i+1, knowledge[I[0][i]])) st.title('My first app') st.write("Here's our first attempt at using data to create a table:") df = pd.DataFrame({ 'first column': [1, 2, 3, 4], 'second column': [10, 20, 30, 40] }) st.write(df) if st.checkbox('Show dataframe'): chart_data = pd.DataFrame( np.random.randn(20, 3), columns=['a', 'b', 'c']) chart_data option = st.selectbox( 'Which number do you like best?', df['first column']) st.write('You selected: ', option) text1 = st.text('This is some text.') if st.button('Say hello'): st.write('Why hello there') else: st.write('Goodbye') agree = st.checkbox('I agree') if agree: st.write('Great!') age = st.slider('How old are you?', 0, 130, 25) st.write("I'm ", age, 'years old') title = st.text_input('Movie title', 'Life of Brian') st.write('The current movie title is', title) number = st.number_input('Insert a number') st.write('The current number is ', number)