import streamlit as st
# To make things easier later, we're also importing numpy and pandas for
# working with sample data.
import numpy as np
import pandas as pd
import faiss

from sentence_transformers import SentenceTransformer

model = SentenceTransformer('moka-ai/m3e-base')

#Our sentences we like to encode
sentences = [
    '* Moka 此文本嵌入模型由 MokaAI 训练并开源，训练脚本使用 uniem',
    '* Massive 此文本嵌入模型通过**千万级**的中文句对数据集进行训练',
    '* Mixed 此文本嵌入模型支持中英双语的同质文本相似度计算，异质文本检索等功能，未来还会支持代码检索，ALL in one'
]

#Sentences are encoded by calling model.encode()
embeddings = model.encode(sentences)

#Print the embeddings
#for sentence, embedding in zip(sentences, embeddings):
#    st.write("Sentence:", sentence)
#    st.write("Embedding:", embedding)
#    st.write("")


def get_embedding(text_content):
    return model.encode(text_content)

# Load the text file as knowledge
knowledge_file = 'knowledge.txt'
knowledge = []
with open(knowledge_file, 'r', encoding='utf-8') as file:
    for line in file:
        knowledge.append(line.strip())

# Perform embedding for the knowledge texts and add to index
embeddings = []
for text in knowledge:
    # Add your code here for text embedding (e.g., using word embeddings, sentence transformers, etc.)
    embedding = get_embedding(text)
    embeddings.append(embedding)
embeddings = np.array(embeddings)

# Create an index
index = faiss.IndexFlatIP(embeddings.shape[1])  # Use Inner Product (IP) as similarity measure

index.add(embeddings)

# Get user input for a question
question = st.text_input("Enter your question: ")

# Perform embedding for the question
question_embedding = get_embedding(question)

# Search index for the most similar content
k = 5  # Number of results to retrieve
D, I = index.search(np.array([question_embedding]), k)

# Display the results
st.write("Top {} similar content:".format(k))
for i in range(k):
    st.write("{}: {}".format(i+1, knowledge[I[0][i]]))

st.title('My first app')

st.write("Here's our first attempt at using data to create a table:")

df = pd.DataFrame({
    'first column': [1, 2, 3, 4],
    'second column': [10, 20, 30, 40]
})

st.write(df)

if st.checkbox('Show dataframe'):
    chart_data = pd.DataFrame(
       np.random.randn(20, 3),
       columns=['a', 'b', 'c'])

    chart_data
    

option = st.selectbox(
    'Which number do you like best?',
     df['first column'])

st.write('You selected: ', option)

text1 = st.text('This is some text.')

if st.button('Say hello'):
    st.write('Why hello there')
else:
    st.write('Goodbye')


agree = st.checkbox('I agree')

if agree:
    st.write('Great!')
    
age = st.slider('How old are you?', 0, 130, 25)

st.write("I'm ", age, 'years old')

title = st.text_input('Movie title', 'Life of Brian')

st.write('The current movie title is', title)

number = st.number_input('Insert a number')

st.write('The current number is ', number)