Adding Download to main branch
Browse files- app.py +1 -0
- appStore/coherence.py +37 -1
- appStore/keyword_search.py +41 -3
- appStore/sdg_analysis.py +99 -12
- requirements.txt +2 -1
app.py
CHANGED
@@ -5,6 +5,7 @@ import appStore.info as info
|
|
5 |
from appStore.multiapp import MultiApp
|
6 |
import streamlit as st
|
7 |
|
|
|
8 |
st.set_page_config(f'SDSN x GIZ Policy Action Tracking v0.1', layout="wide")
|
9 |
|
10 |
app = MultiApp()
|
|
|
5 |
from appStore.multiapp import MultiApp
|
6 |
import streamlit as st
|
7 |
|
8 |
+
# This branch is before the download option was implemented
|
9 |
st.set_page_config(f'SDSN x GIZ Policy Action Tracking v0.1', layout="wide")
|
10 |
|
11 |
app = MultiApp()
|
appStore/coherence.py
CHANGED
@@ -28,6 +28,11 @@ import sqlite3
|
|
28 |
import json
|
29 |
import urllib.request
|
30 |
import ast
|
|
|
|
|
|
|
|
|
|
|
31 |
def app():
|
32 |
# Sidebar
|
33 |
st.sidebar.title('Check Coherence')
|
@@ -222,10 +227,41 @@ def app():
|
|
222 |
#label_indices, paragraph_indices = np.where(similarity_matrix>similarity_high_threshold)
|
223 |
|
224 |
#positive_indices = list(zip(label_indices.tolist(), paragraph_indices.tolist()))
|
225 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
226 |
for _label_idx, _paragraph_idx in positive_indices:
|
227 |
st.write("This paragraph: \n")
|
|
|
228 |
st.write(paraList[_paragraph_idx])
|
229 |
st.write(f"Is relevant to: \n {list(sent_dict.keys())[_label_idx]}")
|
|
|
230 |
st.write('-'*10)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
231 |
|
|
|
28 |
import json
|
29 |
import urllib.request
|
30 |
import ast
|
31 |
+
import docx
|
32 |
+
from docx.shared import Inches
|
33 |
+
from docx.shared import Pt
|
34 |
+
from docx.enum.style import WD_STYLE_TYPE
|
35 |
+
|
36 |
def app():
|
37 |
# Sidebar
|
38 |
st.sidebar.title('Check Coherence')
|
|
|
227 |
#label_indices, paragraph_indices = np.where(similarity_matrix>similarity_high_threshold)
|
228 |
|
229 |
#positive_indices = list(zip(label_indices.tolist(), paragraph_indices.tolist()))
|
230 |
+
document = docx.Document()
|
231 |
+
document.add_heading('Document name:{}'.format(file_name), 2)
|
232 |
+
section = document.sections[0]
|
233 |
+
|
234 |
+
# Calling the footer
|
235 |
+
footer = section.footer
|
236 |
+
|
237 |
+
# Calling the paragraph already present in
|
238 |
+
# the footer section
|
239 |
+
footer_para = footer.paragraphs[0]
|
240 |
+
|
241 |
+
font_styles = document.styles
|
242 |
+
font_charstyle = font_styles.add_style('CommentsStyle', WD_STYLE_TYPE.CHARACTER)
|
243 |
+
font_object = font_charstyle.font
|
244 |
+
font_object.size = Pt(7)
|
245 |
+
# Adding the centered zoned footer
|
246 |
+
footer_para.add_run('''\tPowered by GIZ Data and the Sustainable Development Solution Network hosted at Hugging-Face spaces: https://huggingface.co/spaces/ppsingh/streamlit_dev''', style='CommentsStyle')
|
247 |
+
|
248 |
+
document.add_paragraph("Country Code for which NDC is carried out {}".format(countryCode))
|
249 |
+
|
250 |
for _label_idx, _paragraph_idx in positive_indices:
|
251 |
st.write("This paragraph: \n")
|
252 |
+
document.add_paragraph("This paragraph: \n")
|
253 |
st.write(paraList[_paragraph_idx])
|
254 |
st.write(f"Is relevant to: \n {list(sent_dict.keys())[_label_idx]}")
|
255 |
+
document.add_paragraph(f"Is relevant to: \n {list(sent_dict.keys())[_label_idx]}")
|
256 |
st.write('-'*10)
|
257 |
+
document.add_paragraph('-'*10)
|
258 |
+
|
259 |
+
document.save('demo.docx')
|
260 |
+
with open("demo.docx", "rb") as file:
|
261 |
+
btn = st.download_button(
|
262 |
+
label="Download file",
|
263 |
+
data=file,
|
264 |
+
file_name="demo.docx",
|
265 |
+
mime="txt/docx"
|
266 |
+
)
|
267 |
|
appStore/keyword_search.py
CHANGED
@@ -20,6 +20,10 @@ from sklearn.feature_extraction import _stop_words
|
|
20 |
import string
|
21 |
from tqdm.autonotebook import tqdm
|
22 |
import numpy as np
|
|
|
|
|
|
|
|
|
23 |
|
24 |
import tempfile
|
25 |
import sqlite3
|
@@ -100,8 +104,29 @@ def app():
|
|
100 |
return bm25_hits, hits
|
101 |
|
102 |
def show_results(keywordList):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
for keyword in keywordList:
|
|
|
104 |
st.write("Results for Query: {}".format(keyword))
|
|
|
|
|
105 |
bm25_hits, hits = search(keyword)
|
106 |
|
107 |
st.markdown("""
|
@@ -109,24 +134,36 @@ def app():
|
|
109 |
""")
|
110 |
# In the semantic search part we provide two kind of results one with only Retriever (Bi-Encoder) and other the ReRanker (Cross Encoder)
|
111 |
st.markdown("Top few lexical search (BM25) hits")
|
|
|
|
|
112 |
for hit in bm25_hits[0:5]:
|
113 |
if hit['score'] > 0.00:
|
114 |
st.write("\t Score: {:.3f}: \t{}".format(hit['score'], paraList[hit['corpus_id']].replace("\n", " ")))
|
|
|
115 |
|
116 |
-
|
117 |
-
|
118 |
|
119 |
|
120 |
# st.table(bm25_hits[0:3])
|
121 |
|
122 |
st.markdown("\n-------------------------\n")
|
123 |
st.markdown("Top few Bi-Encoder Retrieval hits")
|
124 |
-
|
|
|
|
|
125 |
hits = sorted(hits, key=lambda x: x['score'], reverse=True)
|
126 |
for hit in hits[0:5]:
|
127 |
# if hit['score'] > 0.45:
|
128 |
st.write("\t Score: {:.3f}: \t{}".format(hit['score'], paraList[hit['corpus_id']].replace("\n", " ")))
|
|
|
129 |
#st.table(hits[0:3]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
|
131 |
|
132 |
@st.cache(allow_output_mutation=True)
|
@@ -206,6 +243,7 @@ def app():
|
|
206 |
if st.button("Find them."):
|
207 |
keywordList = [keyword]
|
208 |
if keywordList is not None:
|
|
|
209 |
show_results(keywordList)
|
210 |
|
211 |
|
|
|
20 |
import string
|
21 |
from tqdm.autonotebook import tqdm
|
22 |
import numpy as np
|
23 |
+
import docx
|
24 |
+
from docx.shared import Inches
|
25 |
+
from docx.shared import Pt
|
26 |
+
from docx.enum.style import WD_STYLE_TYPE
|
27 |
|
28 |
import tempfile
|
29 |
import sqlite3
|
|
|
104 |
return bm25_hits, hits
|
105 |
|
106 |
def show_results(keywordList):
|
107 |
+
document = docx.Document()
|
108 |
+
document.add_heading('Document name:{}'.format(file_name), 2)
|
109 |
+
section = document.sections[0]
|
110 |
+
|
111 |
+
# Calling the footer
|
112 |
+
footer = section.footer
|
113 |
+
|
114 |
+
# Calling the paragraph already present in
|
115 |
+
# the footer section
|
116 |
+
footer_para = footer.paragraphs[0]
|
117 |
+
|
118 |
+
font_styles = document.styles
|
119 |
+
font_charstyle = font_styles.add_style('CommentsStyle', WD_STYLE_TYPE.CHARACTER)
|
120 |
+
font_object = font_charstyle.font
|
121 |
+
font_object.size = Pt(7)
|
122 |
+
# Adding the centered zoned footer
|
123 |
+
footer_para.add_run('''\tPowered by GIZ Data and the Sustainable Development Solution Network hosted at Hugging-Face spaces: https://huggingface.co/spaces/ppsingh/streamlit_dev''', style='CommentsStyle')
|
124 |
+
document.add_heading('Your Seacrhed for {}'.format(keywordList), level=1)
|
125 |
for keyword in keywordList:
|
126 |
+
|
127 |
st.write("Results for Query: {}".format(keyword))
|
128 |
+
para = document.add_paragraph().add_run("Results for Query: {}".format(keyword))
|
129 |
+
para.font.size = Pt(12)
|
130 |
bm25_hits, hits = search(keyword)
|
131 |
|
132 |
st.markdown("""
|
|
|
134 |
""")
|
135 |
# In the semantic search part we provide two kind of results one with only Retriever (Bi-Encoder) and other the ReRanker (Cross Encoder)
|
136 |
st.markdown("Top few lexical search (BM25) hits")
|
137 |
+
document.add_paragraph("Top few lexical search (BM25) hits")
|
138 |
+
|
139 |
for hit in bm25_hits[0:5]:
|
140 |
if hit['score'] > 0.00:
|
141 |
st.write("\t Score: {:.3f}: \t{}".format(hit['score'], paraList[hit['corpus_id']].replace("\n", " ")))
|
142 |
+
document.add_paragraph("\t Score: {:.3f}: \t{}".format(hit['score'], paraList[hit['corpus_id']].replace("\n", " ")))
|
143 |
|
|
|
|
|
144 |
|
145 |
|
146 |
# st.table(bm25_hits[0:3])
|
147 |
|
148 |
st.markdown("\n-------------------------\n")
|
149 |
st.markdown("Top few Bi-Encoder Retrieval hits")
|
150 |
+
document.add_paragraph("\n-------------------------\n")
|
151 |
+
document.add_paragraph("Top few Bi-Encoder Retrieval hits")
|
152 |
+
|
153 |
hits = sorted(hits, key=lambda x: x['score'], reverse=True)
|
154 |
for hit in hits[0:5]:
|
155 |
# if hit['score'] > 0.45:
|
156 |
st.write("\t Score: {:.3f}: \t{}".format(hit['score'], paraList[hit['corpus_id']].replace("\n", " ")))
|
157 |
+
document.add_paragraph("\t Score: {:.3f}: \t{}".format(hit['score'], paraList[hit['corpus_id']].replace("\n", " ")))
|
158 |
#st.table(hits[0:3]
|
159 |
+
document.save('demo.docx')
|
160 |
+
with open("demo.docx", "rb") as file:
|
161 |
+
btn = st.download_button(
|
162 |
+
label="Download file",
|
163 |
+
data=file,
|
164 |
+
file_name="demo.docx",
|
165 |
+
mime="txt/docx"
|
166 |
+
)
|
167 |
|
168 |
|
169 |
@st.cache(allow_output_mutation=True)
|
|
|
243 |
if st.button("Find them."):
|
244 |
keywordList = [keyword]
|
245 |
if keywordList is not None:
|
246 |
+
|
247 |
show_results(keywordList)
|
248 |
|
249 |
|
appStore/sdg_analysis.py
CHANGED
@@ -13,7 +13,11 @@ from transformers import pipeline
|
|
13 |
import matplotlib.pyplot as plt
|
14 |
import numpy as np
|
15 |
import streamlit as st
|
16 |
-
import pandas as pd
|
|
|
|
|
|
|
|
|
17 |
|
18 |
import tempfile
|
19 |
import sqlite3
|
@@ -111,7 +115,11 @@ def app():
|
|
111 |
.sort_values(by="Relevancy", ascending=False)
|
112 |
.reset_index(drop=True)
|
113 |
)
|
114 |
-
|
|
|
|
|
|
|
|
|
115 |
df.index += 1
|
116 |
|
117 |
# Add styling
|
@@ -162,12 +170,13 @@ def app():
|
|
162 |
|
163 |
labels = classifier(par_list)
|
164 |
labels_= [(l['label'],l['score']) for l in labels]
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
x =
|
|
|
171 |
|
172 |
plt.rcParams['font.size'] = 25
|
173 |
colors = plt.get_cmap('Blues')(np.linspace(0.2, 0.7, len(x)))
|
@@ -175,7 +184,7 @@ def app():
|
|
175 |
fig, ax = plt.subplots()
|
176 |
ax.pie(x, colors=colors, radius=2, center=(4, 4),
|
177 |
wedgeprops={"linewidth": 1, "edgecolor": "white"}, frame=False,labels =list(x.index))
|
178 |
-
|
179 |
st.markdown("## 🎈 Anything related to SDGs?")
|
180 |
|
181 |
c4, c5, c6 = st.columns([2, 2, 2])
|
@@ -183,7 +192,7 @@ def app():
|
|
183 |
# Add styling
|
184 |
cmGreen = sns.light_palette("green", as_cmap=True)
|
185 |
cmRed = sns.light_palette("red", as_cmap=True)
|
186 |
-
|
187 |
cmap=cmGreen,
|
188 |
subset=[
|
189 |
"Relevancy",
|
@@ -194,13 +203,91 @@ def app():
|
|
194 |
"Relevancy": "{:.1%}",
|
195 |
}
|
196 |
|
197 |
-
|
198 |
|
199 |
with c5:
|
200 |
st.pyplot(fig)
|
201 |
|
202 |
c7, c8, c9 = st.columns([1, 10, 1])
|
203 |
with c8:
|
204 |
-
st.table(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
205 |
|
206 |
|
|
|
13 |
import matplotlib.pyplot as plt
|
14 |
import numpy as np
|
15 |
import streamlit as st
|
16 |
+
import pandas as pd
|
17 |
+
import docx
|
18 |
+
from docx.shared import Inches
|
19 |
+
from docx.shared import Pt
|
20 |
+
from docx.enum.style import WD_STYLE_TYPE
|
21 |
|
22 |
import tempfile
|
23 |
import sqlite3
|
|
|
115 |
.sort_values(by="Relevancy", ascending=False)
|
116 |
.reset_index(drop=True)
|
117 |
)
|
118 |
+
df1 = (
|
119 |
+
DataFrame(keywords, columns=["Keyword/Keyphrase", "Relevancy"])
|
120 |
+
.sort_values(by="Relevancy", ascending=False)
|
121 |
+
.reset_index(drop=True)
|
122 |
+
)
|
123 |
df.index += 1
|
124 |
|
125 |
# Add styling
|
|
|
170 |
|
171 |
labels = classifier(par_list)
|
172 |
labels_= [(l['label'],l['score']) for l in labels]
|
173 |
+
df2 = DataFrame(labels_, columns=["SDG", "Relevancy"])
|
174 |
+
df2['text'] = par_list
|
175 |
+
df2 = df2.sort_values(by="Relevancy", ascending=False).reset_index(drop=True)
|
176 |
+
df2.index += 1
|
177 |
+
df2 =df2[df2['Relevancy']>.85]
|
178 |
+
x = df2['SDG'].value_counts()
|
179 |
+
df3 = df2.copy()
|
180 |
|
181 |
plt.rcParams['font.size'] = 25
|
182 |
colors = plt.get_cmap('Blues')(np.linspace(0.2, 0.7, len(x)))
|
|
|
184 |
fig, ax = plt.subplots()
|
185 |
ax.pie(x, colors=colors, radius=2, center=(4, 4),
|
186 |
wedgeprops={"linewidth": 1, "edgecolor": "white"}, frame=False,labels =list(x.index))
|
187 |
+
fig.savefig('temp.png', bbox_inches='tight',dpi= 100)
|
188 |
st.markdown("## 🎈 Anything related to SDGs?")
|
189 |
|
190 |
c4, c5, c6 = st.columns([2, 2, 2])
|
|
|
192 |
# Add styling
|
193 |
cmGreen = sns.light_palette("green", as_cmap=True)
|
194 |
cmRed = sns.light_palette("red", as_cmap=True)
|
195 |
+
df2 = df2.style.background_gradient(
|
196 |
cmap=cmGreen,
|
197 |
subset=[
|
198 |
"Relevancy",
|
|
|
203 |
"Relevancy": "{:.1%}",
|
204 |
}
|
205 |
|
206 |
+
df2 = df2.format(format_dictionary)
|
207 |
|
208 |
with c5:
|
209 |
st.pyplot(fig)
|
210 |
|
211 |
c7, c8, c9 = st.columns([1, 10, 1])
|
212 |
with c8:
|
213 |
+
st.table(df2)
|
214 |
+
|
215 |
+
document = docx.Document()
|
216 |
+
document.add_heading('Document name:{}'.format(file_name), 2)
|
217 |
+
# Choosing the top most section of the page
|
218 |
+
section = document.sections[0]
|
219 |
+
|
220 |
+
# Calling the footer
|
221 |
+
footer = section.footer
|
222 |
+
|
223 |
+
# Calling the paragraph already present in
|
224 |
+
# the footer section
|
225 |
+
footer_para = footer.paragraphs[0]
|
226 |
+
|
227 |
+
font_styles = document.styles
|
228 |
+
font_charstyle = font_styles.add_style('CommentsStyle', WD_STYLE_TYPE.CHARACTER)
|
229 |
+
font_object = font_charstyle.font
|
230 |
+
font_object.size = Pt(7)
|
231 |
+
# Adding the centered zoned footer
|
232 |
+
footer_para.add_run('''\tPowered by GIZ Data and the Sustainable Development Solution Network hosted at Hugging-Face spaces: https://huggingface.co/spaces/ppsingh/streamlit_dev''', style='CommentsStyle')
|
233 |
+
|
234 |
+
#footer_para.text = "\tPowered by GIZ Data and the Sustainable Development Solution Network\
|
235 |
+
# hosted at Hugging-Face spaces: https://huggingface.co/spaces/ppsingh/streamlit_dev"
|
236 |
+
#footer_para.font.size = docx.shared.Pt(6)
|
237 |
+
|
238 |
+
document.add_heading('What is the document about', level=1)
|
239 |
+
t = document.add_table(df1.shape[0]+1, df1.shape[1])
|
240 |
+
|
241 |
+
|
242 |
+
# add the header rows.
|
243 |
+
for j in range(df1.shape[-1]):
|
244 |
+
t.cell(0,j).text = df1.columns[j]
|
245 |
+
|
246 |
+
|
247 |
+
# add the rest of the data frame
|
248 |
+
for i in range(df1.shape[0]):
|
249 |
+
for j in range(df1.shape[-1]):
|
250 |
+
t.cell(i+1,j).text = str(df1.values[i,j])
|
251 |
+
|
252 |
+
|
253 |
+
|
254 |
+
document.add_heading('Anything Related to SDG', level=1)
|
255 |
+
document.add_picture('temp.png', width=Inches(3), height=Inches(3))
|
256 |
+
t = document.add_table(df3.shape[0]+1, df3.shape[1])
|
257 |
+
|
258 |
+
widths = [Inches(0.4), Inches(0.4), Inches(4.5)]
|
259 |
+
# add the header rows.
|
260 |
+
for j in range(df3.shape[-1]):
|
261 |
+
t.cell(0,j).text = df3.columns[j]
|
262 |
+
t.cell(0,j).width = widths[j]
|
263 |
+
|
264 |
+
# add the rest of the data frame
|
265 |
+
for i in range(df3.shape[0]):
|
266 |
+
for j in range(df3.shape[-1]):
|
267 |
+
t.cell(i+1,j).width = widths[j]
|
268 |
+
t.cell(i+1,j).text = str(df3.values[i,j])
|
269 |
+
|
270 |
+
|
271 |
+
document.save('demo.docx')
|
272 |
+
|
273 |
+
#with open('summary.txt', 'w') as f:
|
274 |
+
# f.write(df1.to_string())
|
275 |
+
# f.write(fig)
|
276 |
+
#f.write(df2)
|
277 |
+
# f.write(df3.to_string())
|
278 |
+
|
279 |
+
with open("demo.docx", "rb") as file:
|
280 |
+
btn = st.download_button(
|
281 |
+
label="Download file",
|
282 |
+
data=file,
|
283 |
+
file_name="demo.docx",
|
284 |
+
mime="txt/docx"
|
285 |
+
)
|
286 |
+
#with document st.download_button(
|
287 |
+
# label="Download data as docx",
|
288 |
+
# data=document,
|
289 |
+
#file_name='test.docx',
|
290 |
+
#mime='text/docx',
|
291 |
+
# )
|
292 |
|
293 |
|
requirements.txt
CHANGED
@@ -11,4 +11,5 @@ pdfplumber==0.6.2
|
|
11 |
Pillow==9.1.1
|
12 |
seaborn==0.11.2
|
13 |
transformers==4.13.0
|
14 |
-
rank_bm25
|
|
|
|
11 |
Pillow==9.1.1
|
12 |
seaborn==0.11.2
|
13 |
transformers==4.13.0
|
14 |
+
rank_bm25
|
15 |
+
python-docx
|