Spaces:
Sleeping
Sleeping
File size: 3,040 Bytes
69881c2 1a3c951 69881c2 3e8419b d55e7f9 3e8419b 1a3c951 69881c2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
import streamlit as st
# To make things easier later, we're also importing numpy and pandas for
# working with sample data.
import numpy as np
import pandas as pd
import faiss
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('moka-ai/m3e-base')
#Our sentences we like to encode
sentences = [
'* Moka 此文本嵌入模型由 MokaAI 训练并开源,训练脚本使用 uniem',
'* Massive 此文本嵌入模型通过**千万级**的中文句对数据集进行训练',
'* Mixed 此文本嵌入模型支持中英双语的同质文本相似度计算,异质文本检索等功能,未来还会支持代码检索,ALL in one'
]
#Sentences are encoded by calling model.encode()
embeddings = model.encode(sentences)
#Print the embeddings
#for sentence, embedding in zip(sentences, embeddings):
# st.write("Sentence:", sentence)
# st.write("Embedding:", embedding)
# st.write("")
def get_embedding(text_content):
return model.encode(text_content)
# Load the text file as knowledge
knowledge_file = 'knowledge.txt'
knowledge = []
with open(knowledge_file, 'r', encoding='utf-8') as file:
for line in file:
knowledge.append(line.strip())
# Create an index
index = faiss.IndexFlatIP(300) # Use Inner Product (IP) as similarity measure
# Perform embedding for the knowledge texts and add to index
embeddings = []
for text in knowledge:
# Add your code here for text embedding (e.g., using word embeddings, sentence transformers, etc.)
embedding = get_embedding(text)
embeddings.append(embedding)
embeddings = np.array(embeddings)
index.add(embeddings)
# Get user input for a question
question = st.text_input("Enter your question: ")
# Perform embedding for the question
question_embedding = get_embedding(question)
# Search index for the most similar content
k = 5 # Number of results to retrieve
D, I = index.search(np.array([question_embedding]), k)
# Display the results
st.write("Top {} similar content:".format(k))
for i in range(k):
st.write("{}: {}".format(i+1, knowledge[I[0][i]]))
st.title('My first app')
st.write("Here's our first attempt at using data to create a table:")
df = pd.DataFrame({
'first column': [1, 2, 3, 4],
'second column': [10, 20, 30, 40]
})
st.write(df)
if st.checkbox('Show dataframe'):
chart_data = pd.DataFrame(
np.random.randn(20, 3),
columns=['a', 'b', 'c'])
chart_data
option = st.selectbox(
'Which number do you like best?',
df['first column'])
st.write('You selected: ', option)
text1 = st.text('This is some text.')
if st.button('Say hello'):
st.write('Why hello there')
else:
st.write('Goodbye')
agree = st.checkbox('I agree')
if agree:
st.write('Great!')
age = st.slider('How old are you?', 0, 130, 25)
st.write("I'm ", age, 'years old')
title = st.text_input('Movie title', 'Life of Brian')
st.write('The current movie title is', title)
number = st.number_input('Insert a number')
st.write('The current number is ', number)
|