Spaces:

GIZ
/

cpu_tracs

Sleeping

App Files Files Community

ppsingh commited on Jul 18, 2023

Commit

28f514c

•

1 Parent(s): 5169b3e

target stats

Browse files

Files changed (1) hide show

appStore/target.py +54 -66

appStore/target.py CHANGED Viewed

@@ -59,19 +59,6 @@ def to_excel(df):
     return processed_data
 def app():
-    #### APP INFO #####
-    #     st.write(
-    #         """
-    #         The **Target Extraction** app is an easy-to-use interface built \
-    #             in Streamlit for analyzing policy documents for \
-    #              Classification of the paragraphs/texts in the document *If it \
-    #             contains any Economy-Wide Targets related information* - \
-    #             developed by GIZ Data Service Center, GFA, IKI Tracs, \
-    #              SV Klima and SPA. \n
-    #         """)
     ### Main app code ###
     with st.container():
         if 'key0' in st.session_state:
@@ -88,64 +75,47 @@ def app():
             df  = target_classification(haystack_doc=df,
                                     threshold= params['threshold'])
             st.session_state.key1 = df
-          # # excel part
-            # temp = df[df['Relevancy']>threshold]
-            # df['Validation'] =  'No'
-            # df_xlsx = to_excel(df)
-            # st.download_button(label='📥 Download Current Result',
-            #                 data=df_xlsx ,
-            #                 file_name= 'file_target.xlsx')
 def target_display():
     if  'key1' in st.session_state:
-        df = st.session_state.key1
         hits  = df[df['Target Label'] == 'TARGET']
-        st.table(hits)
-        # # hits['GHG Label'] = hits['GHG Label'].apply(lambda i: _lab_dict[i])
-        # range_val = min(5,len(hits))
-        # if range_val !=0:
-        #     count_target = sum(hits['Target Label'] == 'TARGET')
-        #     count_netzero = sum(hits['Netzero Label'] == 'NETZERO')
-        #     count_ghg = sum(hits['GHG Label'] == 'GHG')
-        #     count_economy = sum([True if 'Economy-wide' in x else False
-        #                       for x in hits['Sector Label']])
-        #     # count_df = df['Target Label'].value_counts()
-        #     # count_df = count_df.rename('count')
-        #     # count_df = count_df.rename_axis('Target Label').reset_index()
-        #     # count_df['Label_def'] = count_df['Target Label'].apply(lambda x: _lab_dict[x])
-        #     # fig = px.bar(count_df, y="Label_def", x="count", orientation='h', height=200)
-        #     c1, c2 = st.columns([1,1])
-        #     with c1:
-        #         st.write('**Target Paragraphs**: `{}`'.format(count_target))
-        #         st.write('**NetZero Related Paragraphs**: `{}`'.format(count_netzero))
-        #         # st.plotly_chart(fig,use_container_width= True)
-        #     # count_netzero = sum(hits['Netzero Label'] == 'NETZERO')
-        #     # count_ghg = sum(hits['GHG Label'] == 'LABEL_2')
-        #     # count_economy = sum([True if 'Economy-wide' in x else False
-        #     #                   for x in hits['Sector Label']])
-        #     with c2:
-        #         st.write('**GHG Related Paragraphs**: `{}`'.format(count_ghg))
-        #         st.write('**Economy-wide Related Paragraphs**: `{}`'.format(count_economy))
-        #     st.write('-------------------')
-        #     hits = hits.sort_values(by=['Relevancy'], ascending=False)
-        #     netzerohit = hits[hits['Netzero Label'] == 'NETZERO']
-        #     if not netzerohit.empty:
-        #         netzerohit = netzerohit.sort_values(by = ['Netzero Score'], ascending = False)
-        #         # st.write('-------------------')
-        #         # st.markdown("###### Netzero paragraph ######")
-        #         st.write('**Netzero paragraph** `page {}`: {}'.format(netzerohit.iloc[0]['page'],
-        #                         netzerohit.iloc[0]['text'].replace("\n", " ")))
-        #         st.write("")
-        #     else:
-        #         st.info("🤔 No Netzero paragraph found")
         #     # st.write("**Result {}** `page {}` (Relevancy Score: {:.2f})'".format(i+1,hits.iloc[i]['page'],hits.iloc[i]['Relevancy'])")
         #     st.write('-------------------')
@@ -173,4 +143,22 @@ def target_display():
         #                     file_name= os.path.splitext(st.session_state['filename'])[0]+'.xlsx')
         # else:
-        #     st.info("🤔 No Targets found")

     return processed_data
 def app():
     ### Main app code ###
     with st.container():
         if 'key0' in st.session_state:
             df  = target_classification(haystack_doc=df,
                                     threshold= params['threshold'])
             st.session_state.key1 = df
 def target_display():
     if  'key1' in st.session_state:
+        df = st.session_state.key1
+        st.info('**{}**  with ~{} pages is splitted into {} paragraphs/text chunks (page number is **True** only for pdf files)'\
+                      .format(os.path.basename(st.session_state['filename']),st.session_state['pages'], len(df)), icon="ℹ️")
+        st.caption("""Some Targets are an intention to achieve a specific result, \
+                    for example, to reduce GHG emissions to a specific level \
+                    (a GHG target) or increase energy efficiency or renewable \
+                    energy to a specific level (a non-GHG target), typically by \
+                    a certain date""")
         hits  = df[df['Target Label'] == 'TARGET']
+        range_val = min(5,len(hits))
+        if range_val !=0:
+            # collecting some statistics
+            count_target = sum(hits['Target Label'] == 'TARGET')
+            count_netzero = sum(hits['Netzero Label'] == 'NETZERO')
+            count_ghg = sum(hits['GHG Label'] == 'GHG')
+            count_economy = sum([True if 'Economy-wide' in x else False
+                              for x in hits['Sector Label']])
+            c1, c2 = st.columns([1,1])
+            with c1:
+                st.write('**Target Paragraphs**: `{}`'.format(count_target))
+                st.write('**NetZero Related Paragraphs**: `{}`'.format(count_netzero))
+            with c2:
+                st.write('**GHG Target Related Paragraphs**: `{}`'.format(count_ghg))
+                st.write('**Economy-wide Related Paragraphs**: `{}`'.format(count_economy))
+            st.write('-------------------')
+            hits = hits.sort_values(by=['Relevancy'], ascending=False)
+            netzerohit = hits[hits['Netzero Label'] == 'NETZERO']
+            if not netzerohit.empty:
+                netzerohit = netzerohit.sort_values(by = ['Netzero Score'], ascending = False)
+                # st.write('-------------------')
+                # st.markdown("###### Netzero paragraph ######")
+                st.write('**Netzero paragraph** `page {}`: {}'.format(netzerohit.iloc[0]['page'],
+                                netzerohit.iloc[0]['text'].replace("\n", " ")))
+                st.write("")
+            else:
+                st.info("🤔 No Netzero paragraph found")
         #     # st.write("**Result {}** `page {}` (Relevancy Score: {:.2f})'".format(i+1,hits.iloc[i]['page'],hits.iloc[i]['Relevancy'])")
         #     st.write('-------------------')
         #                     file_name= os.path.splitext(st.session_state['filename'])[0]+'.xlsx')
         # else:
+        #     st.info("🤔 No Targets found")
+            # count_df = df['Target Label'].value_counts()
+            # count_df = count_df.rename('count')
+            # count_df = count_df.rename_axis('Target Label').reset_index()
+            # count_df['Label_def'] = count_df['Target Label'].apply(lambda x: _lab_dict[x])
+                # st.plotly_chart(fig,use_container_width= True)
+            # count_netzero = sum(hits['Netzero Label'] == 'NETZERO')
+            # count_ghg = sum(hits['GHG Label'] == 'LABEL_2')
+            # count_economy = sum([True if 'Economy-wide' in x else False
+            #                   for x in hits['Sector Label']])
+          # # excel part
+            # temp = df[df['Relevancy']>threshold]
+            # df['Validation'] =  'No'
+            # df_xlsx = to_excel(df)
+            # st.download_button(label='📥 Download Current Result',
+            #                 data=df_xlsx ,
+            #                 file_name= 'file_target.xlsx')