import plotly.express as px import streamlit as st from sentence_transformers import SentenceTransformer import umap.umap_ as umap import pandas as pd import os def app(): st.title("SDG Embedding Visualisation") with st.expander("ℹī¸ - About this app", expanded=True): st.write( """ Information cartography - Get your word/phrase/sentence/paragraph embedded and visualized. The (English) sentence-transformers model "all-MiniLM-L6-v2" maps sentences & paragraphs to a 384 dimensional dense vector space This is normally used for tasks like clustering or semantic search, but in this case, we use it to place your text to a 3D map. Before plotting, the dimension needs to be reduced to three so we can actually plot it, but preserve as much information as possible. For this, we use a technology called umap. On this page, you find thousands of text excerpts that were labelled by the community volunteers with respect to Sustainable Development Goals, a project by OSDG.ai, embedded as described. Ready to explore. """) with st.spinner("👑 load data"): df_osdg = pd.read_csv("sdg_umap.csv", sep = "|") #labels = [_lab_dict[lab] for lab in df_osdg['label'] ] keys = list(df_osdg['keys']) #docs = list(df_osdg['text']) agree = st.checkbox('add labels') if agree: with st.spinner("👑 create visualisation"): fig = px.scatter_3d( df_osdg, x='coord_x', y='coord_y', z='coord_z', color='labels', opacity = .5, hover_data=[keys]) fig.update_scenes(xaxis_visible=False, yaxis_visible=False,zaxis_visible=False ) fig.update_traces(marker_size=4) st.plotly_chart(fig) else: with st.spinner("👑 create visualisation"): fig = px.scatter_3d( df_osdg, x='coord_x', y='coord_y', z='coord_z', opacity = .5, hover_data=[keys]) fig.update_scenes(xaxis_visible=False, yaxis_visible=False,zaxis_visible=False ) fig.update_traces(marker_size=4) st.plotly_chart(fig)