peter2000 commited on
Commit
d162bf7
β€’
1 Parent(s): 11cb408

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -3
app.py CHANGED
@@ -4,10 +4,10 @@ st.set_page_config(f'SDSN x GIZ Policy Tracing', layout="wide")
4
  import seaborn as sns
5
  import pdfplumber
6
  from pandas import DataFrame
7
- from keybert import KeyBERT
8
  import matplotlib.pyplot as plt
9
  import numpy as np
10
  import streamlit as st
 
11
 
12
 
13
 
@@ -68,11 +68,121 @@ with st.expander("ℹ️ - About this app", expanded=True):
68
 
69
  st.markdown("")
70
  st.markdown("")
71
- st.markdown("## πŸ“Œ Step One: Upload document ")
72
 
73
 
74
  with st.container():
75
  st.markdown("## πŸ“Œ Step One: Upload document ")
76
  ##file = st.file_uploader('Upload PDF File', type=['pdf'])
77
  text_str = read_(file)
78
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import seaborn as sns
5
  import pdfplumber
6
  from pandas import DataFrame
 
7
  import matplotlib.pyplot as plt
8
  import numpy as np
9
  import streamlit as st
10
+ import sentence-transformers
11
 
12
 
13
 
 
68
 
69
  st.markdown("")
70
  st.markdown("")
71
+ #st.markdown("## πŸ“Œ Step One: Upload document ")
72
 
73
 
74
  with st.container():
75
  st.markdown("## πŸ“Œ Step One: Upload document ")
76
  ##file = st.file_uploader('Upload PDF File', type=['pdf'])
77
  text_str = read_(file)
78
+
79
+
80
+ import seaborn as sns
81
+ import pdfplumber
82
+ from pandas import DataFrame
83
+ from keybert import KeyBERT
84
+ import matplotlib.pyplot as plt
85
+ import numpy as np
86
+ import streamlit as st
87
+
88
+
89
+
90
+ @st.cache(allow_output_mutation=True)
91
+ def load_model():
92
+ return KeyBERT()
93
+
94
+ kw_model = load_model()
95
+
96
+ keywords = kw_model.extract_keywords(
97
+ text_str,
98
+ keyphrase_ngram_range=(1, 2),
99
+ use_mmr=True,
100
+ stop_words="english",
101
+ top_n=10,
102
+ diversity=0.7,
103
+ )
104
+
105
+ st.markdown("## 🎈 What is my document about?")
106
+
107
+ df = (
108
+ DataFrame(keywords, columns=["Keyword/Keyphrase", "Relevancy"])
109
+ .sort_values(by="Relevancy", ascending=False)
110
+ .reset_index(drop=True)
111
+ )
112
+
113
+ df.index += 1
114
+
115
+ # Add styling
116
+ cmGreen = sns.light_palette("green", as_cmap=True)
117
+ cmRed = sns.light_palette("red", as_cmap=True)
118
+ df = df.style.background_gradient(
119
+ cmap=cmGreen,
120
+ subset=[
121
+ "Relevancy",
122
+ ],
123
+ )
124
+ c1, c2, c3 = st.columns([1, 3, 1])
125
+
126
+ format_dictionary = {
127
+ "Relevancy": "{:.1%}",
128
+ }
129
+
130
+ df = df.format(format_dictionary)
131
+
132
+ with c2:
133
+ st.table(df)
134
+
135
+ ######## SDG!
136
+ from transformers import pipeline
137
+
138
+ finetuned_checkpoint = "jonas/sdg_classifier_osdg"
139
+ classifier = pipeline("text-classification", model=finetuned_checkpoint)
140
+
141
+ word_list = text_str.split()
142
+ len_word_list = len(word_list)
143
+ par_list = []
144
+ par_len = 130
145
+ for i in range(0,len_word_list // par_len):
146
+ string_part = ' '.join(word_list[i*par_len:(i+1)*par_len])
147
+ par_list.append(string_part)
148
+
149
+ labels = classifier(par_list)
150
+ labels_= [(l['label'],l['score']) for l in labels]
151
+ df = DataFrame(labels_, columns=["SDG", "Relevancy"])
152
+ df['text'] = ['... '+par+' ...' for par in par_list]
153
+ df = df.sort_values(by="Relevancy", ascending=False).reset_index(drop=True)
154
+ df.index += 1
155
+ df =df[df['Relevancy']>.9]
156
+ x = df['SDG'].value_counts()
157
+
158
+ plt.rcParams['font.size'] = 25
159
+ colors = plt.get_cmap('Blues')(np.linspace(0.2, 0.7, len(x)))
160
+ # plot
161
+ fig, ax = plt.subplots()
162
+ ax.pie(x, colors=colors, radius=2, center=(4, 4),
163
+ wedgeprops={"linewidth": 1, "edgecolor": "white"}, frame=False,labels =list(x.index))
164
+
165
+ st.markdown("## 🎈 Anything related to SDGs?")
166
+
167
+ c4, c5, c6 = st.columns([5, 7, 1])
168
+
169
+ # Add styling
170
+ cmGreen = sns.light_palette("green", as_cmap=True)
171
+ cmRed = sns.light_palette("red", as_cmap=True)
172
+ df = df.style.background_gradient(
173
+ cmap=cmGreen,
174
+ subset=[
175
+ "Relevancy",
176
+ ],
177
+ )
178
+
179
+ format_dictionary = {
180
+ "Relevancy": "{:.1%}",
181
+ }
182
+
183
+ df = df.format(format_dictionary)
184
+
185
+ with c4:
186
+ st.pyplot(fig)
187
+ with c5:
188
+ st.table(df)