peter2000 commited on
Commit
529d899
β€’
1 Parent(s): 5a34641

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -183
app.py CHANGED
@@ -1,188 +1,15 @@
1
- import streamlit as st
2
- st.set_page_config(f'SDSN x GIZ Policy Tracing', layout="wide")
3
-
4
- import seaborn as sns
5
- import pdfplumber
6
- from pandas import DataFrame
7
- import matplotlib.pyplot as plt
8
- import numpy as np
9
- import streamlit as st
10
- import sentence-transformers
11
-
12
-
13
-
14
- ##@st.cache(allow_output_mutation=True)
15
- def load_model():
16
- return KeyBERT()
17
-
18
- def read_(file):
19
- if file is not None:
20
- text = []
21
- with pdfplumber.open(file) as pdf:
22
- for page in pdf.pages:
23
- text.append(page.extract_text())
24
- text_str = ' '.join([page for page in text])
25
- st.write('Document:', pdf.metadata)
26
- st.write('Number of pages:',len(pdf.pages))
27
- pdf.close()
28
- return text_str
29
-
30
-
31
-
32
- st.sidebar.image(
33
- "https://github.com/gizdatalab/policy_tracing/blob/main/img/sdsn.png?raw=true",
34
- use_column_width=True
35
- )
36
- st.sidebar.markdown("## πŸ“Œ Step One: Upload document ")
37
-
38
- with st.sidebar:
39
- file = st.file_uploader('Upload PDF File', type=['pdf'])
40
-
41
- st.sidebar.title(
42
- "Options:"
43
- )
44
-
45
- st.sidebar.markdown(
46
- "You can freely browse the different chapters - ie example prompts from different people - and see the results."
47
- )
48
-
49
- selected_date = st.sidebar.selectbox(
50
- "Please select the chapter you want to read:",
51
- ['c1','c2']
52
- )
53
-
54
- with st.container():
55
- st.markdown("<h1 style='text-align: center; color: black;'> SDSN X GIZ - Policy Action Tracking</h1>", unsafe_allow_html=True)
56
- st.write(' ')
57
- st.write(' ')
58
-
59
- with st.expander("ℹ️ - About this app", expanded=True):
60
-
61
- st.write(
62
- """
63
- The *Policy Action Tracker* app is an easy-to-use interface built with Streamlit for analyzing policy documents - developed by GIZ Data and the Sustainable Development Solution Network.
64
-
65
- It uses a minimal keyword extraction technique that leverages multiple NLP embeddings and relies on [Transformers] (https://huggingface.co/transformers/) πŸ€— to create keywords/keyphrases that are most similar to a document.
66
- """
67
- )
68
-
69
- st.markdown("")
70
- st.markdown("")
71
- #st.markdown("## πŸ“Œ Step One: Upload document ")
72
-
73
-
74
- with st.container():
75
- st.markdown("## πŸ“Œ Step One: Upload document ")
76
- ##file = st.file_uploader('Upload PDF File', type=['pdf'])
77
- text_str = read_(file)
78
-
79
-
80
- import seaborn as sns
81
- import pdfplumber
82
- from pandas import DataFrame
83
- from keybert import KeyBERT
84
- import matplotlib.pyplot as plt
85
- import numpy as np
86
  import streamlit as st
87
 
 
88
 
 
89
 
90
- @st.cache(allow_output_mutation=True)
91
- def load_model():
92
- return KeyBERT()
93
-
94
- kw_model = load_model()
95
-
96
- keywords = kw_model.extract_keywords(
97
- text_str,
98
- keyphrase_ngram_range=(1, 2),
99
- use_mmr=True,
100
- stop_words="english",
101
- top_n=10,
102
- diversity=0.7,
103
- )
104
-
105
- st.markdown("## 🎈 What is my document about?")
106
-
107
- df = (
108
- DataFrame(keywords, columns=["Keyword/Keyphrase", "Relevancy"])
109
- .sort_values(by="Relevancy", ascending=False)
110
- .reset_index(drop=True)
111
- )
112
-
113
- df.index += 1
114
-
115
- # Add styling
116
- cmGreen = sns.light_palette("green", as_cmap=True)
117
- cmRed = sns.light_palette("red", as_cmap=True)
118
- df = df.style.background_gradient(
119
- cmap=cmGreen,
120
- subset=[
121
- "Relevancy",
122
- ],
123
- )
124
- c1, c2, c3 = st.columns([1, 3, 1])
125
-
126
- format_dictionary = {
127
- "Relevancy": "{:.1%}",
128
- }
129
-
130
- df = df.format(format_dictionary)
131
-
132
- with c2:
133
- st.table(df)
134
-
135
- ######## SDG!
136
- from transformers import pipeline
137
-
138
- finetuned_checkpoint = "jonas/sdg_classifier_osdg"
139
- classifier = pipeline("text-classification", model=finetuned_checkpoint)
140
-
141
- word_list = text_str.split()
142
- len_word_list = len(word_list)
143
- par_list = []
144
- par_len = 130
145
- for i in range(0,len_word_list // par_len):
146
- string_part = ' '.join(word_list[i*par_len:(i+1)*par_len])
147
- par_list.append(string_part)
148
-
149
- labels = classifier(par_list)
150
- labels_= [(l['label'],l['score']) for l in labels]
151
- df = DataFrame(labels_, columns=["SDG", "Relevancy"])
152
- df['text'] = ['... '+par+' ...' for par in par_list]
153
- df = df.sort_values(by="Relevancy", ascending=False).reset_index(drop=True)
154
- df.index += 1
155
- df =df[df['Relevancy']>.9]
156
- x = df['SDG'].value_counts()
157
-
158
- plt.rcParams['font.size'] = 25
159
- colors = plt.get_cmap('Blues')(np.linspace(0.2, 0.7, len(x)))
160
- # plot
161
- fig, ax = plt.subplots()
162
- ax.pie(x, colors=colors, radius=2, center=(4, 4),
163
- wedgeprops={"linewidth": 1, "edgecolor": "white"}, frame=False,labels =list(x.index))
164
-
165
- st.markdown("## 🎈 Anything related to SDGs?")
166
-
167
- c4, c5, c6 = st.columns([5, 7, 1])
168
-
169
- # Add styling
170
- cmGreen = sns.light_palette("green", as_cmap=True)
171
- cmRed = sns.light_palette("red", as_cmap=True)
172
- df = df.style.background_gradient(
173
- cmap=cmGreen,
174
- subset=[
175
- "Relevancy",
176
- ],
177
- )
178
-
179
- format_dictionary = {
180
- "Relevancy": "{:.1%}",
181
- }
182
-
183
- df = df.format(format_dictionary)
184
 
185
- with c4:
186
- st.pyplot(fig)
187
- with c5:
188
- st.table(df)
 
1
+ import appStore.keyword_search as keyword_search
2
+ import appStore.sdg_analysis as sdg_analysis
3
+ # import appStore.check_site as check_site
4
+ from appStore.multiapp import MultiApp
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  import streamlit as st
6
 
7
+ st.set_page_config(f'SDSN x GIZ Policy Tracing', layout="wide")
8
 
9
+ app = MultiApp()
10
 
11
+ app.add_app("Analyse Policy Document", sdg_analysis.app)
12
+ app.add_app("KeyWord Search", keyword_search.app)
13
+ # app.add_app("Check Coherence", check_site.app)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ app.run()