import streamlit as st from streamlit_option_menu import option_menu from word2vec import * import pandas as pd from autocomplete import * from vector_graph import * from plots import * from lsj_dict import * import json from streamlit_tags import st_tags, st_tags_sidebar st.set_page_config(page_title="Ancient Greek Word2Vec", layout="centered") def click_nn_button(): st.session_state.nearest_neighbours = not st.session_state.nearest_neighbours # Horizontal menu active_tab = option_menu(None, ["Nearest neighbours", "Cosine similarity", "3D graph", 'Dictionary'], menu_icon="cast", default_index=0, orientation="horizontal") # Prepare dictionary lemma_dict = json.load(open('lsj_dict.json', 'r')) # Nearest neighbours tab if active_tab == "Nearest neighbours": st.write("### TO DO: add description of function") col1, col2 = st.columns(2) # Load the compressed word list compressed_word_list_filename = 'corpora/compass_filtered.pkl.gz' all_words = load_compressed_word_list(compressed_word_list_filename) eligible_models = ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"] if 'nearest_neighbours' not in st.session_state: st.session_state.nearest_neighbours = False with st.container(): word = st.multiselect("Enter a word", all_words, max_selections=1) if len(word) > 0: word = word[0] # Check which models contain the word eligible_models = check_word_in_models(word) models = st.multiselect( "Select models to search for neighbours", eligible_models ) n = st.slider("Number of neighbours", 1, 50, 15) nearest_neighbours_button = st.button("Find nearest neighbours", on_click = click_nn_button) # If the button to calculate nearest neighbours is clicked if st.session_state.nearest_neighbours: # Check if all fields are filled in if validate_nearest_neighbours(word, n, models) == False: st.error('Please fill in all fields') else: # Rewrite models to list of all loaded models models = load_selected_models(models) nearest_neighbours = get_nearest_neighbours(word, n, models) all_dfs = [] # Create dataframes for model in nearest_neighbours.keys(): st.write(f"### {model}") df = pd.DataFrame( nearest_neighbours[model], columns = ['Word', 'Cosine Similarity'] ) all_dfs.append((model, df)) st.table(df) # Store content in a temporary file tmp_file = store_df_in_temp_file(all_dfs) # Open the temporary file and read its content with open(tmp_file, "rb") as file: file_byte = file.read() # Create download button st.download_button( "Download results", data=file_byte, file_name = f'nearest_neighbours_{word}_TEST.xlsx', mime='application/octet-stream' ) # Cosine similarity tab elif active_tab == "Cosine similarity": col1, col2 = st.columns(2) col3, col4 = st.columns(2) with st.container(): with col1: word_1 = st.text_input("Enter a word", placeholder="πατήρ") with col2: time_slice_1 = st.selectbox("Time slice word 1", ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"]) with st.container(): with col3: word_2 = st.text_input("Enter a word", placeholder="μήτηρ") with col4: time_slice_2 = st.selectbox("Time slice word 2", ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"]) # Create button for calculating cosine similarity cosine_similarity_button = st.button("Calculate cosine similarity") # If the button is clicked, execute calculation if cosine_similarity_button: cosine_simularity_score = get_cosine_similarity(word_1, time_slice_1, word_2, time_slice_2) st.write(cosine_simularity_score) # 3D graph tab elif active_tab == "3D graph": col1, col2 = st.columns(2) # Load compressed word list compressed_word_list_filename = 'corpora/compass_filtered.pkl.gz' all_words = load_compressed_word_list(compressed_word_list_filename) with st.container(): with col1: word = st.multiselect("Enter a word", all_words, max_selections=1) if len(word) > 0: word = word[0] with col2: time_slice = st.selectbox("Time slice", ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"]) n = st.slider("Number of words", 1, 50, 15) graph_button = st.button("Create 3D graph") if graph_button: time_slice_model = convert_time_name_to_model(time_slice) nearest_neighbours_vectors = get_nearest_neighbours_vectors(word, time_slice_model, n) # nearest_neighbours_3d_vectors = create_3d_vectors(word, time_slice_model, nearest_neighbours_vectors) st.dataframe(nearest_neighbours_vectors) # new_3d_vectors = nearest_neighbours_to_pca_vectors(word, time_slice, nearest_neighbours_vectors) # st.dataframe(new_3d_vectors) fig, df = make_3d_plot4(nearest_neighbours_vectors, word, time_slice_model) st.dataframe(df) st.plotly_chart(fig) # Dictionary tab elif active_tab == "Dictionary": with st.container(): all_lemmas = load_compressed_word_list('all_lemmas.pkl.gz') # query_word = st.multiselect("Search a word in the LSJ dictionary", all_lemmas, max_selections=1) query_tag = st_tags(label = 'Search a word in the LSJ dictionary', text = '', value = [], suggestions = all_lemmas, maxtags = 1, key = '1' ) # If a word has been selected by user if query_tag: st.write(f"### {query_tag[0]}") # Display word information if query_tag[0] in lemma_dict: data = lemma_dict[query_tag[0]] elif query_tag[0].capitalize() in lemma_dict: # Some words are capitalized in the dictionary data = lemma_dict[query_tag[0].capitalize()] else: st.error("Word not found in dictionary") # Put text in readable format text = format_text(data) st.markdown(text)