import streamlit as st import pandas as pd from sentence_transformers import SentenceTransformer, util from datasets import load_dataset @st.cache_resource def load_model(): return SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2') model = load_model() secret_word = "нос" secred_embedding = model.encode(secret_word) if 'words' not in st.session_state: st.session_state['words'] = [] st.write('Try to guess a secret word by semantic similarity') word = st.text_input("Input a word") used_words = [w for w, s in st.session_state['words']] if st.button("Guess") or word: if word not in used_words: word_embedding = model.encode(word) similarity = util.pytorch_cos_sim(secred_embedding, word_embedding).cpu().numpy()[0][0] st.session_state['words'].append((word, similarity)) words_df = pd.DataFrame( st.session_state['words'], columns=["word", "similarity"] ).sort_values(by=["similarity"], ascending=False) st.dataframe(words_df) @st.cache_data def load_words_dataset(): dataset = load_dataset("marksverdhei/wordnet-definitions-en-2021", split="train") return dataset["Word"] all_words = load_words_dataset() st.write(all_words)