peter2000 commited on
Commit
dcbae7f
β€’
1 Parent(s): 980d263

Create new file

Browse files
Files changed (1) hide show
  1. apps/sdg.py +63 -0
apps/sdg.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import plotly.express as px
2
+ import streamlit as st
3
+ from sentence_transformers import SentenceTransformer
4
+ import umap.umap_ as umap
5
+ import pandas as pd
6
+ import os
7
+
8
+ st.title("SDG Embedding Visualisation")
9
+
10
+ with st.spinner("πŸ‘‘ load language model (sentence transformer)"):
11
+ model_name = 'sentence-transformers/all-MiniLM-L6-v2'
12
+ model = SentenceTransformer(model_name)
13
+
14
+ with st.spinner("πŸ‘‘ load sdg data"):
15
+ df_osdg = pd.read_csv('https://zenodo.org/record/5550238/files/osdg-community-dataset-v21-09-30.csv',sep='\t')
16
+ df_osdg = df_osdg[df_osdg['agreement']>.95]
17
+ df_osdg = df_osdg[df_osdg['labels_positive']>3]
18
+ #df_osdg = df_osdg[:1000]
19
+
20
+ _lab_dict = {0: 'no_cat',
21
+ 1:'SDG 1 - No poverty',
22
+ 2:'SDG 2 - Zero hunger',
23
+ 3:'SDG 3 - Good health and well-being',
24
+ 4:'SDG 4 - Quality education',
25
+ 5:'SDG 5 - Gender equality',
26
+ 6:'SDG 6 - Clean water and sanitation',
27
+ 7:'SDG 7 - Affordable and clean energy',
28
+ 8:'SDG 8 - Decent work and economic growth',
29
+ 9:'SDG 9 - Industry, Innovation and Infrastructure',
30
+ 10:'SDG 10 - Reduced inequality',
31
+ 11:'SDG 11 - Sustainable cities and communities',
32
+ 12:'SDG 12 - Responsible consumption and production',
33
+ 13:'SDG 13 - Climate action',
34
+ 14:'SDG 14 - Life below water',
35
+ 15:'SDG 15 - Life on land',
36
+ 16:'SDG 16 - Peace, justice and strong institutions',
37
+ 17:'SDG 17 - Partnership for the goals',}
38
+
39
+ labels = [_lab_dict[lab] for lab in df_osdg['sdg'] ]
40
+ #keys = list(df_osdg['keys'])
41
+ docs = list(df_osdg['text'])
42
+ docs_embeddings = model.encode(docs)
43
+
44
+ with st.spinner("πŸ‘‘ prepare visualisation"):
45
+ n_neighbors = 15
46
+ n_components = 3
47
+ random_state =42
48
+ umap_model = (umap.UMAP(n_neighbors=n_neighbors,
49
+ n_components=n_components,
50
+ metric='cosine',
51
+ random_state=random_state)
52
+ .fit(docs_embeddings))
53
+
54
+ docs_umap = umap_model.transform(docs_embeddings)
55
+
56
+ with st.spinner("πŸ‘‘ create visualisation"):
57
+ fig = px.scatter_3d(
58
+ docs_umap, x=0, y=1, z=2,
59
+ color=labels,
60
+ opacity = .5)#, hover_data=[keys])
61
+ fig.update_scenes(xaxis_visible=False, yaxis_visible=False,zaxis_visible=False )
62
+ fig.update_traces(marker_size=4)
63
+ st.plotly_chart(fig)