import streamlit as st from streamlit_option_menu import option_menu from word2vec import * import pandas as pd from autocomplete import * from vector_graph import * from plots import * st.set_page_config(page_title="Ancient Greek Word2Vec", layout="centered") # Horizontal menu active_tab = option_menu(None, ["Nearest neighbours", "Cosine similarity", "3D graph", 'Dictionary'], menu_icon="cast", default_index=0, orientation="horizontal") # Nearest neighbours tab if active_tab == "Nearest neighbours": st.write("### TO DO: add description of function") col1, col2 = st.columns(2) # Load the compressed word list compressed_word_list_filename = 'corpora/compass_filtered.pkl.gz' all_words = load_compressed_word_list(compressed_word_list_filename) with st.container(): with col1: word = st.multiselect("Enter a word", all_words, max_selections=1) if len(word) > 0: word = word[0] with col2: time_slice = st.selectbox("Time slice", ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"]) models = st.multiselect( "Select models to search for neighbours", ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"] ) n = st.slider("Number of neighbours", 1, 50, 15) nearest_neighbours_button = st.button("Find nearest neighbours") # If the button to calculate nearest neighbours is clicked if nearest_neighbours_button: # Rewrite timeslices to model names: Archaic -> archaic_cbow if time_slice == 'Hellenistic': time_slice = 'hellen' elif time_slice == 'Early Roman': time_slice = 'early_roman' elif time_slice == 'Late Roman': time_slice = 'late_roman' time_slice = time_slice.lower() + "_cbow" # Check if all fields are filled in if validate_nearest_neighbours(word, time_slice, n, models) == False: st.error('Please fill in all fields') else: # Rewrite models to list of all loaded models models = load_selected_models(models) nearest_neighbours = get_nearest_neighbours(word, time_slice, n, models) df = pd.DataFrame( nearest_neighbours, columns=["Word", "Time slice", "Similarity"], index = range(1, len(nearest_neighbours) + 1) ) st.table(df) # Store content in a temporary file tmp_file = store_df_in_temp_file(df) # Open the temporary file and read its content with open(tmp_file, "rb") as file: file_byte = file.read() # Create download button st.download_button( "Download results", data=file_byte, file_name = f'nearest_neighbours_{word}_{time_slice}.xlsx', mime='application/octet-stream' ) # Cosine similarity tab elif active_tab == "Cosine similarity": col1, col2 = st.columns(2) col3, col4 = st.columns(2) with st.container(): with col1: word_1 = st.text_input("Enter a word", placeholder="πατήρ") with col2: time_slice_1 = st.selectbox("Time slice word 1", ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"]) with st.container(): with col3: word_2 = st.text_input("Enter a word", placeholder="μήτηρ") with col4: time_slice_2 = st.selectbox("Time slice word 2", ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"]) # Create button for calculating cosine similarity cosine_similarity_button = st.button("Calculate cosine similarity") # If the button is clicked, execute calculation if cosine_similarity_button: cosine_simularity_score = get_cosine_similarity(word_1, time_slice_1, word_2, time_slice_2) st.write(cosine_simularity_score) # 3D graph tab elif active_tab == "3D graph": col1, col2 = st.columns(2) # Load compressed word list compressed_word_list_filename = 'corpora/compass_filtered.pkl.gz' all_words = load_compressed_word_list(compressed_word_list_filename) with st.container(): with col1: word = st.multiselect("Enter a word", all_words, max_selections=1) if len(word) > 0: word = word[0] with col2: time_slice = st.selectbox("Time slice", ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"]) n = st.slider("Number of words", 1, 50, 15) graph_button = st.button("Create 3D graph") if graph_button: time_slice_model = convert_time_name_to_model(time_slice) nearest_neighbours_vectors = get_nearest_neighbours_vectors(word, time_slice_model, n) # nearest_neighbours_3d_vectors = create_3d_vectors(word, time_slice_model, nearest_neighbours_vectors) st.dataframe(nearest_neighbours_vectors) # new_3d_vectors = nearest_neighbours_to_pca_vectors(word, time_slice, nearest_neighbours_vectors) # st.dataframe(new_3d_vectors) fig, df = make_3d_plot4(nearest_neighbours_vectors, word, time_slice_model) st.dataframe(df) st.plotly_chart(fig) # Dictionary tab elif active_tab == "Dictionary": with st.container(): st.write("Dictionary tab")