Spaces:
Running
Running
File size: 2,774 Bytes
dcbae7f 76ddf65 d3d9b79 85f22bf d3d9b79 5f18822 d3d9b79 85f22bf d3d9b79 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import plotly.express as px
import streamlit as st
from sentence_transformers import SentenceTransformer
import umap.umap_ as umap
import pandas as pd
import os
def app():
st.title("SDG Embedding Visualisation")
with st.spinner("๐ load language model (sentence transformer)"):
model_name = 'sentence-transformers/all-MiniLM-L6-v2'
model = SentenceTransformer(model_name)
with st.spinner("๐ load and embed SDG texts"):
df_osdg = pd.read_csv('https://zenodo.org/record/5550238/files/osdg-community-dataset-v21-09-30.csv',sep='\t')
df_osdg = df_osdg[df_osdg['agreement']>.95]
df_osdg = df_osdg[df_osdg['labels_positive']>4]
#df_osdg = df_osdg[:1000]
_lab_dict = {0: 'no_cat',
1:'SDG 1 - No poverty',
2:'SDG 2 - Zero hunger',
3:'SDG 3 - Good health and well-being',
4:'SDG 4 - Quality education',
5:'SDG 5 - Gender equality',
6:'SDG 6 - Clean water and sanitation',
7:'SDG 7 - Affordable and clean energy',
8:'SDG 8 - Decent work and economic growth',
9:'SDG 9 - Industry, Innovation and Infrastructure',
10:'SDG 10 - Reduced inequality',
11:'SDG 11 - Sustainable cities and communities',
12:'SDG 12 - Responsible consumption and production',
13:'SDG 13 - Climate action',
14:'SDG 14 - Life below water',
15:'SDG 15 - Life on land',
16:'SDG 16 - Peace, justice and strong institutions',
17:'SDG 17 - Partnership for the goals',}
labels = [_lab_dict[lab] for lab in df_osdg['sdg'] ]
#keys = list(df_osdg['keys'])
docs = list(df_osdg['text'])
docs_embeddings = model.encode(docs)
with st.spinner("๐ map to 3D for visualisation"):
n_neighbors = 15
n_components = 3
random_state =42
umap_model = (umap.UMAP(n_neighbors=n_neighbors,
n_components=n_components,
metric='cosine',
random_state=random_state)
.fit(docs_embeddings))
docs_umap = umap_model.transform(docs_embeddings)
with st.spinner("๐ create visualisation"):
fig = px.scatter_3d(
docs_umap, x=0, y=1, z=2,
color=labels,
opacity = .5)#, hover_data=[keys])
fig.update_scenes(xaxis_visible=False, yaxis_visible=False,zaxis_visible=False )
fig.update_traces(marker_size=4)
st.plotly_chart(fig) |