ppsingh commited on
Commit
28f514c
1 Parent(s): 5169b3e

target stats

Browse files
Files changed (1) hide show
  1. appStore/target.py +54 -66
appStore/target.py CHANGED
@@ -59,19 +59,6 @@ def to_excel(df):
59
  return processed_data
60
 
61
  def app():
62
-
63
- #### APP INFO #####
64
- # st.write(
65
- # """
66
- # The **Target Extraction** app is an easy-to-use interface built \
67
- # in Streamlit for analyzing policy documents for \
68
- # Classification of the paragraphs/texts in the document *If it \
69
- # contains any Economy-Wide Targets related information* - \
70
- # developed by GIZ Data Service Center, GFA, IKI Tracs, \
71
- # SV Klima and SPA. \n
72
- # """)
73
-
74
-
75
  ### Main app code ###
76
  with st.container():
77
  if 'key0' in st.session_state:
@@ -88,64 +75,47 @@ def app():
88
  df = target_classification(haystack_doc=df,
89
  threshold= params['threshold'])
90
  st.session_state.key1 = df
91
-
92
- # # excel part
93
- # temp = df[df['Relevancy']>threshold]
94
-
95
- # df['Validation'] = 'No'
96
- # df_xlsx = to_excel(df)
97
- # st.download_button(label='📥 Download Current Result',
98
- # data=df_xlsx ,
99
- # file_name= 'file_target.xlsx')
100
 
101
  def target_display():
102
  if 'key1' in st.session_state:
103
- df = st.session_state.key1
104
-
 
105
 
 
 
 
 
 
106
  hits = df[df['Target Label'] == 'TARGET']
107
- st.table(hits)
108
- # # hits['GHG Label'] = hits['GHG Label'].apply(lambda i: _lab_dict[i])
109
- # range_val = min(5,len(hits))
110
- # if range_val !=0:
111
- # count_target = sum(hits['Target Label'] == 'TARGET')
112
- # count_netzero = sum(hits['Netzero Label'] == 'NETZERO')
113
- # count_ghg = sum(hits['GHG Label'] == 'GHG')
114
- # count_economy = sum([True if 'Economy-wide' in x else False
115
- # for x in hits['Sector Label']])
116
-
117
- # # count_df = df['Target Label'].value_counts()
118
- # # count_df = count_df.rename('count')
119
- # # count_df = count_df.rename_axis('Target Label').reset_index()
120
- # # count_df['Label_def'] = count_df['Target Label'].apply(lambda x: _lab_dict[x])
121
-
122
- # # fig = px.bar(count_df, y="Label_def", x="count", orientation='h', height=200)
123
- # c1, c2 = st.columns([1,1])
124
- # with c1:
125
- # st.write('**Target Paragraphs**: `{}`'.format(count_target))
126
- # st.write('**NetZero Related Paragraphs**: `{}`'.format(count_netzero))
127
 
128
- # # st.plotly_chart(fig,use_container_width= True)
129
-
130
- # # count_netzero = sum(hits['Netzero Label'] == 'NETZERO')
131
- # # count_ghg = sum(hits['GHG Label'] == 'LABEL_2')
132
- # # count_economy = sum([True if 'Economy-wide' in x else False
133
- # # for x in hits['Sector Label']])
134
- # with c2:
135
- # st.write('**GHG Related Paragraphs**: `{}`'.format(count_ghg))
136
- # st.write('**Economy-wide Related Paragraphs**: `{}`'.format(count_economy))
137
- # st.write('-------------------')
138
- # hits = hits.sort_values(by=['Relevancy'], ascending=False)
139
- # netzerohit = hits[hits['Netzero Label'] == 'NETZERO']
140
- # if not netzerohit.empty:
141
- # netzerohit = netzerohit.sort_values(by = ['Netzero Score'], ascending = False)
142
- # # st.write('-------------------')
143
- # # st.markdown("###### Netzero paragraph ######")
144
- # st.write('**Netzero paragraph** `page {}`: {}'.format(netzerohit.iloc[0]['page'],
145
- # netzerohit.iloc[0]['text'].replace("\n", " ")))
146
- # st.write("")
147
- # else:
148
- # st.info("🤔 No Netzero paragraph found")
149
 
150
  # # st.write("**Result {}** `page {}` (Relevancy Score: {:.2f})'".format(i+1,hits.iloc[i]['page'],hits.iloc[i]['Relevancy'])")
151
  # st.write('-------------------')
@@ -173,4 +143,22 @@ def target_display():
173
  # file_name= os.path.splitext(st.session_state['filename'])[0]+'.xlsx')
174
 
175
  # else:
176
- # st.info("🤔 No Targets found")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  return processed_data
60
 
61
  def app():
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  ### Main app code ###
63
  with st.container():
64
  if 'key0' in st.session_state:
 
75
  df = target_classification(haystack_doc=df,
76
  threshold= params['threshold'])
77
  st.session_state.key1 = df
 
 
 
 
 
 
 
 
 
78
 
79
  def target_display():
80
  if 'key1' in st.session_state:
81
+ df = st.session_state.key1
82
+ st.info('**{}** with ~{} pages is splitted into {} paragraphs/text chunks (page number is **True** only for pdf files)'\
83
+ .format(os.path.basename(st.session_state['filename']),st.session_state['pages'], len(df)), icon="ℹ️")
84
 
85
+ st.caption("""Some Targets are an intention to achieve a specific result, \
86
+ for example, to reduce GHG emissions to a specific level \
87
+ (a GHG target) or increase energy efficiency or renewable \
88
+ energy to a specific level (a non-GHG target), typically by \
89
+ a certain date""")
90
  hits = df[df['Target Label'] == 'TARGET']
91
+ range_val = min(5,len(hits))
92
+ if range_val !=0:
93
+ # collecting some statistics
94
+ count_target = sum(hits['Target Label'] == 'TARGET')
95
+ count_netzero = sum(hits['Netzero Label'] == 'NETZERO')
96
+ count_ghg = sum(hits['GHG Label'] == 'GHG')
97
+ count_economy = sum([True if 'Economy-wide' in x else False
98
+ for x in hits['Sector Label']])
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
+ c1, c2 = st.columns([1,1])
101
+ with c1:
102
+ st.write('**Target Paragraphs**: `{}`'.format(count_target))
103
+ st.write('**NetZero Related Paragraphs**: `{}`'.format(count_netzero))
104
+ with c2:
105
+ st.write('**GHG Target Related Paragraphs**: `{}`'.format(count_ghg))
106
+ st.write('**Economy-wide Related Paragraphs**: `{}`'.format(count_economy))
107
+ st.write('-------------------')
108
+ hits = hits.sort_values(by=['Relevancy'], ascending=False)
109
+ netzerohit = hits[hits['Netzero Label'] == 'NETZERO']
110
+ if not netzerohit.empty:
111
+ netzerohit = netzerohit.sort_values(by = ['Netzero Score'], ascending = False)
112
+ # st.write('-------------------')
113
+ # st.markdown("###### Netzero paragraph ######")
114
+ st.write('**Netzero paragraph** `page {}`: {}'.format(netzerohit.iloc[0]['page'],
115
+ netzerohit.iloc[0]['text'].replace("\n", " ")))
116
+ st.write("")
117
+ else:
118
+ st.info("🤔 No Netzero paragraph found")
 
 
119
 
120
  # # st.write("**Result {}** `page {}` (Relevancy Score: {:.2f})'".format(i+1,hits.iloc[i]['page'],hits.iloc[i]['Relevancy'])")
121
  # st.write('-------------------')
 
143
  # file_name= os.path.splitext(st.session_state['filename'])[0]+'.xlsx')
144
 
145
  # else:
146
+ # st.info("🤔 No Targets found")
147
+ # count_df = df['Target Label'].value_counts()
148
+ # count_df = count_df.rename('count')
149
+ # count_df = count_df.rename_axis('Target Label').reset_index()
150
+ # count_df['Label_def'] = count_df['Target Label'].apply(lambda x: _lab_dict[x])
151
+ # st.plotly_chart(fig,use_container_width= True)
152
+
153
+ # count_netzero = sum(hits['Netzero Label'] == 'NETZERO')
154
+ # count_ghg = sum(hits['GHG Label'] == 'LABEL_2')
155
+ # count_economy = sum([True if 'Economy-wide' in x else False
156
+ # for x in hits['Sector Label']])
157
+ # # excel part
158
+ # temp = df[df['Relevancy']>threshold]
159
+
160
+ # df['Validation'] = 'No'
161
+ # df_xlsx = to_excel(df)
162
+ # st.download_button(label='📥 Download Current Result',
163
+ # data=df_xlsx ,
164
+ # file_name= 'file_target.xlsx')