Spaces:

GIZ
/

SDSN-demo

Runtime error

App Files Files Community

ppsingh commited on Sep 26, 2022

Commit

8c4c590

1 Parent(s): b0f3060

Adding Download to main branch

Browse files

Files changed (5) hide show

app.py +1 -0
appStore/coherence.py +37 -1
appStore/keyword_search.py +41 -3
appStore/sdg_analysis.py +99 -12
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import appStore.info as info
 from appStore.multiapp import MultiApp
 import streamlit as st
 st.set_page_config(f'SDSN x GIZ Policy Action Tracking v0.1', layout="wide")
 app = MultiApp()

 from appStore.multiapp import MultiApp
 import streamlit as st
+# This branch is before the download option was implemented
 st.set_page_config(f'SDSN x GIZ Policy Action Tracking v0.1', layout="wide")
 app = MultiApp()

appStore/coherence.py CHANGED Viewed

@@ -28,6 +28,11 @@ import sqlite3
 import json
 import urllib.request
 import ast
 def app():
     # Sidebar
     st.sidebar.title('Check Coherence')
@@ -222,10 +227,41 @@ def app():
                     #label_indices, paragraph_indices = np.where(similarity_matrix>similarity_high_threshold)
                     #positive_indices = list(zip(label_indices.tolist(), paragraph_indices.tolist()))
                     for _label_idx, _paragraph_idx in positive_indices:
                         st.write("This paragraph: \n")
                         st.write(paraList[_paragraph_idx])
                         st.write(f"Is relevant to: \n {list(sent_dict.keys())[_label_idx]}")
                         st.write('-'*10)

 import json
 import urllib.request
 import ast
+import docx
+from docx.shared import Inches
+from docx.shared import Pt
+from docx.enum.style import WD_STYLE_TYPE
 def app():
     # Sidebar
     st.sidebar.title('Check Coherence')
                     #label_indices, paragraph_indices = np.where(similarity_matrix>similarity_high_threshold)
                     #positive_indices = list(zip(label_indices.tolist(), paragraph_indices.tolist()))
+                    document = docx.Document()
+                    document.add_heading('Document name:{}'.format(file_name), 2)
+                    section = document.sections[0]
+                      # Calling the footer
+                    footer = section.footer
+                    # Calling the paragraph already present in
+                    # the footer section
+                    footer_para = footer.paragraphs[0]
+                    font_styles = document.styles
+                    font_charstyle = font_styles.add_style('CommentsStyle', WD_STYLE_TYPE.CHARACTER)
+                    font_object = font_charstyle.font
+                    font_object.size = Pt(7)
+                    # Adding the centered zoned footer
+                    footer_para.add_run('''\tPowered by GIZ Data and the Sustainable Development Solution Network hosted at Hugging-Face spaces:                        https://huggingface.co/spaces/ppsingh/streamlit_dev''', style='CommentsStyle')
+                    document.add_paragraph("Country Code for which NDC is carried out {}".format(countryCode))
                     for _label_idx, _paragraph_idx in positive_indices:
                         st.write("This paragraph: \n")
+                        document.add_paragraph("This paragraph: \n")
                         st.write(paraList[_paragraph_idx])
                         st.write(f"Is relevant to: \n {list(sent_dict.keys())[_label_idx]}")
+                        document.add_paragraph(f"Is relevant to: \n {list(sent_dict.keys())[_label_idx]}")
                         st.write('-'*10)
+                        document.add_paragraph('-'*10)
+                    document.save('demo.docx')
+                    with open("demo.docx", "rb") as file:
+                                btn = st.download_button(
+                                label="Download file",
+                                data=file,
+                                file_name="demo.docx",
+                                mime="txt/docx"
+                                  )

appStore/keyword_search.py CHANGED Viewed

@@ -20,6 +20,10 @@ from sklearn.feature_extraction import _stop_words
 import string
 from tqdm.autonotebook import tqdm
 import numpy as np
 import tempfile
 import sqlite3
@@ -100,8 +104,29 @@ def app():
                 return bm25_hits, hits
       def show_results(keywordList):
         for keyword in keywordList:
           st.write("Results for Query: {}".format(keyword))
           bm25_hits, hits = search(keyword)
           st.markdown("""
@@ -109,24 +134,36 @@ def app():
                       """)
           # In the semantic search part we provide two kind of results one with only Retriever (Bi-Encoder) and other the ReRanker (Cross Encoder)
           st.markdown("Top few lexical search (BM25) hits")
           for hit in bm25_hits[0:5]:
               if hit['score'] > 0.00:
                   st.write("\t Score: {:.3f}:  \t{}".format(hit['score'], paraList[hit['corpus_id']].replace("\n", " ")))
         #   st.table(bm25_hits[0:3])
           st.markdown("\n-------------------------\n")
           st.markdown("Top few Bi-Encoder Retrieval hits")
           hits = sorted(hits, key=lambda x: x['score'], reverse=True)
           for hit in hits[0:5]:
             #  if hit['score'] > 0.45:
               st.write("\t Score: {:.3f}:  \t{}".format(hit['score'], paraList[hit['corpus_id']].replace("\n", " ")))
           #st.table(hits[0:3]
       @st.cache(allow_output_mutation=True)
@@ -206,6 +243,7 @@ def app():
           if st.button("Find them."):
             keywordList = [keyword]
           if keywordList is not None:
               show_results(keywordList)

 import string
 from tqdm.autonotebook import tqdm
 import numpy as np
+import docx
+from docx.shared import Inches
+from docx.shared import Pt
+from docx.enum.style import WD_STYLE_TYPE
 import tempfile
 import sqlite3
                 return bm25_hits, hits
       def show_results(keywordList):
+        document = docx.Document()
+        document.add_heading('Document name:{}'.format(file_name), 2)
+        section = document.sections[0]
+          # Calling the footer
+        footer = section.footer
+        # Calling the paragraph already present in
+        # the footer section
+        footer_para = footer.paragraphs[0]
+        font_styles = document.styles
+        font_charstyle = font_styles.add_style('CommentsStyle', WD_STYLE_TYPE.CHARACTER)
+        font_object = font_charstyle.font
+        font_object.size = Pt(7)
+        # Adding the centered zoned footer
+        footer_para.add_run('''\tPowered by GIZ Data and the Sustainable Development Solution Network hosted at Hugging-Face spaces: https://huggingface.co/spaces/ppsingh/streamlit_dev''', style='CommentsStyle')
+        document.add_heading('Your Seacrhed for {}'.format(keywordList), level=1)
         for keyword in keywordList:
           st.write("Results for Query: {}".format(keyword))
+          para = document.add_paragraph().add_run("Results for Query: {}".format(keyword))
+          para.font.size = Pt(12)
           bm25_hits, hits = search(keyword)
           st.markdown("""
                       """)
           # In the semantic search part we provide two kind of results one with only Retriever (Bi-Encoder) and other the ReRanker (Cross Encoder)
           st.markdown("Top few lexical search (BM25) hits")
+          document.add_paragraph("Top few lexical search (BM25) hits")
           for hit in bm25_hits[0:5]:
               if hit['score'] > 0.00:
                   st.write("\t Score: {:.3f}:  \t{}".format(hit['score'], paraList[hit['corpus_id']].replace("\n", " ")))
+                  document.add_paragraph("\t Score: {:.3f}:  \t{}".format(hit['score'], paraList[hit['corpus_id']].replace("\n", " ")))
         #   st.table(bm25_hits[0:3])
           st.markdown("\n-------------------------\n")
           st.markdown("Top few Bi-Encoder Retrieval hits")
+          document.add_paragraph("\n-------------------------\n")
+          document.add_paragraph("Top few Bi-Encoder Retrieval hits")
           hits = sorted(hits, key=lambda x: x['score'], reverse=True)
           for hit in hits[0:5]:
             #  if hit['score'] > 0.45:
               st.write("\t Score: {:.3f}:  \t{}".format(hit['score'], paraList[hit['corpus_id']].replace("\n", " ")))
+              document.add_paragraph("\t Score: {:.3f}:  \t{}".format(hit['score'], paraList[hit['corpus_id']].replace("\n", " ")))
           #st.table(hits[0:3]
+        document.save('demo.docx')
+        with open("demo.docx", "rb") as file:
+                     btn = st.download_button(
+                     label="Download file",
+                     data=file,
+                     file_name="demo.docx",
+                     mime="txt/docx"
+                       )
       @st.cache(allow_output_mutation=True)
           if st.button("Find them."):
             keywordList = [keyword]
           if keywordList is not None:
               show_results(keywordList)

appStore/sdg_analysis.py CHANGED Viewed

@@ -13,7 +13,11 @@ from transformers import pipeline
 import matplotlib.pyplot as plt
 import numpy as np
 import streamlit as st
-import pandas as pd
 import tempfile
 import sqlite3
@@ -111,7 +115,11 @@ def app():
                     .sort_values(by="Relevancy", ascending=False)
                     .reset_index(drop=True)
                 )
                 df.index += 1
                 # Add styling
@@ -162,12 +170,13 @@ def app():
                 labels = classifier(par_list)
                 labels_= [(l['label'],l['score']) for l in labels]
-                df = DataFrame(labels_, columns=["SDG", "Relevancy"])
-                df['text'] = par_list
-                df = df.sort_values(by="Relevancy", ascending=False).reset_index(drop=True)
-                df.index += 1
-                df =df[df['Relevancy']>.85]
-                x = df['SDG'].value_counts()
                 plt.rcParams['font.size'] = 25
                 colors = plt.get_cmap('Blues')(np.linspace(0.2, 0.7, len(x)))
@@ -175,7 +184,7 @@ def app():
                 fig, ax = plt.subplots()
                 ax.pie(x, colors=colors, radius=2, center=(4, 4),
                     wedgeprops={"linewidth": 1, "edgecolor": "white"}, frame=False,labels =list(x.index))
                 st.markdown("## 🎈 Anything related to SDGs?")
                 c4, c5, c6 = st.columns([2, 2, 2])
@@ -183,7 +192,7 @@ def app():
                 # Add styling
                 cmGreen = sns.light_palette("green", as_cmap=True)
                 cmRed = sns.light_palette("red", as_cmap=True)
-                df = df.style.background_gradient(
                     cmap=cmGreen,
                     subset=[
                         "Relevancy",
@@ -194,13 +203,91 @@ def app():
                     "Relevancy": "{:.1%}",
                 }
-                df = df.format(format_dictionary)
                 with c5:
                     st.pyplot(fig)
                 c7, c8, c9 = st.columns([1, 10, 1])
                 with c8:
-                    st.table(df)

 import matplotlib.pyplot as plt
 import numpy as np
 import streamlit as st
+import pandas as pd
+import docx
+from docx.shared import Inches
+from docx.shared import Pt
+from docx.enum.style import WD_STYLE_TYPE
 import tempfile
 import sqlite3
                     .sort_values(by="Relevancy", ascending=False)
                     .reset_index(drop=True)
                 )
+                df1 = (
+                    DataFrame(keywords, columns=["Keyword/Keyphrase", "Relevancy"])
+                    .sort_values(by="Relevancy", ascending=False)
+                    .reset_index(drop=True)
+                )
                 df.index += 1
                 # Add styling
                 labels = classifier(par_list)
                 labels_= [(l['label'],l['score']) for l in labels]
+                df2 = DataFrame(labels_, columns=["SDG", "Relevancy"])
+                df2['text'] = par_list
+                df2 = df2.sort_values(by="Relevancy", ascending=False).reset_index(drop=True)
+                df2.index += 1
+                df2 =df2[df2['Relevancy']>.85]
+                x = df2['SDG'].value_counts()
+                df3 = df2.copy()
                 plt.rcParams['font.size'] = 25
                 colors = plt.get_cmap('Blues')(np.linspace(0.2, 0.7, len(x)))
                 fig, ax = plt.subplots()
                 ax.pie(x, colors=colors, radius=2, center=(4, 4),
                     wedgeprops={"linewidth": 1, "edgecolor": "white"}, frame=False,labels =list(x.index))
+                fig.savefig('temp.png', bbox_inches='tight',dpi= 100)
                 st.markdown("## 🎈 Anything related to SDGs?")
                 c4, c5, c6 = st.columns([2, 2, 2])
                 # Add styling
                 cmGreen = sns.light_palette("green", as_cmap=True)
                 cmRed = sns.light_palette("red", as_cmap=True)
+                df2 = df2.style.background_gradient(
                     cmap=cmGreen,
                     subset=[
                         "Relevancy",
                     "Relevancy": "{:.1%}",
                 }
+                df2 = df2.format(format_dictionary)
                 with c5:
                     st.pyplot(fig)
                 c7, c8, c9 = st.columns([1, 10, 1])
                 with c8:
+                    st.table(df2)
+                document = docx.Document()
+                document.add_heading('Document name:{}'.format(file_name), 2)
+                # Choosing the top most section of the page
+                section = document.sections[0]
+                # Calling the footer
+                footer = section.footer
+                # Calling the paragraph already present in
+                # the footer section
+                footer_para = footer.paragraphs[0]
+                font_styles = document.styles
+                font_charstyle = font_styles.add_style('CommentsStyle', WD_STYLE_TYPE.CHARACTER)
+                font_object = font_charstyle.font
+                font_object.size = Pt(7)
+                # Adding the centered zoned footer
+                footer_para.add_run('''\tPowered by GIZ Data and the Sustainable Development Solution Network hosted at Hugging-Face spaces: https://huggingface.co/spaces/ppsingh/streamlit_dev''', style='CommentsStyle')
+                #footer_para.text = "\tPowered by GIZ Data and the Sustainable Development Solution Network\
+                 #                     hosted at Hugging-Face spaces: https://huggingface.co/spaces/ppsingh/streamlit_dev"
+                #footer_para.font.size = docx.shared.Pt(6)
+                document.add_heading('What is the document about', level=1)
+                t = document.add_table(df1.shape[0]+1, df1.shape[1])
+                # add the header rows.
+                for j in range(df1.shape[-1]):
+                    t.cell(0,j).text = df1.columns[j]
+                # add the rest of the data frame
+                for i in range(df1.shape[0]):
+                    for j in range(df1.shape[-1]):
+                        t.cell(i+1,j).text = str(df1.values[i,j])
+                document.add_heading('Anything Related to SDG', level=1)
+                document.add_picture('temp.png', width=Inches(3), height=Inches(3))
+                t = document.add_table(df3.shape[0]+1, df3.shape[1])
+                widths = [Inches(0.4), Inches(0.4), Inches(4.5)]
+                # add the header rows.
+                for j in range(df3.shape[-1]):
+                    t.cell(0,j).text = df3.columns[j]
+                    t.cell(0,j).width = widths[j]
+                # add the rest of the data frame
+                for i in range(df3.shape[0]):
+                    for j in range(df3.shape[-1]):
+                        t.cell(i+1,j).width = widths[j]
+                        t.cell(i+1,j).text = str(df3.values[i,j])
+                document.save('demo.docx')
+                #with open('summary.txt', 'w') as f:
+                 #   f.write(df1.to_string())
+                 #   f.write(fig)
+                    #f.write(df2)
+                    # f.write(df3.to_string())
+                with open("demo.docx", "rb") as file:
+                     btn = st.download_button(
+                     label="Download file",
+                     data=file,
+                     file_name="demo.docx",
+                     mime="txt/docx"
+                       )
+                #with document st.download_button(
+                 #  label="Download data as docx",
+                  # data=document,
+                   #file_name='test.docx',
+                   #mime='text/docx',
+                    # )

requirements.txt CHANGED Viewed

@@ -11,4 +11,5 @@ pdfplumber==0.6.2
 Pillow==9.1.1
 seaborn==0.11.2
 transformers==4.13.0
-rank_bm25

 Pillow==9.1.1
 seaborn==0.11.2
 transformers==4.13.0
+rank_bm25
+python-docx