target stats
Browse files- appStore/target.py +54 -66
appStore/target.py
CHANGED
@@ -59,19 +59,6 @@ def to_excel(df):
|
|
59 |
return processed_data
|
60 |
|
61 |
def app():
|
62 |
-
|
63 |
-
#### APP INFO #####
|
64 |
-
# st.write(
|
65 |
-
# """
|
66 |
-
# The **Target Extraction** app is an easy-to-use interface built \
|
67 |
-
# in Streamlit for analyzing policy documents for \
|
68 |
-
# Classification of the paragraphs/texts in the document *If it \
|
69 |
-
# contains any Economy-Wide Targets related information* - \
|
70 |
-
# developed by GIZ Data Service Center, GFA, IKI Tracs, \
|
71 |
-
# SV Klima and SPA. \n
|
72 |
-
# """)
|
73 |
-
|
74 |
-
|
75 |
### Main app code ###
|
76 |
with st.container():
|
77 |
if 'key0' in st.session_state:
|
@@ -88,64 +75,47 @@ def app():
|
|
88 |
df = target_classification(haystack_doc=df,
|
89 |
threshold= params['threshold'])
|
90 |
st.session_state.key1 = df
|
91 |
-
|
92 |
-
# # excel part
|
93 |
-
# temp = df[df['Relevancy']>threshold]
|
94 |
-
|
95 |
-
# df['Validation'] = 'No'
|
96 |
-
# df_xlsx = to_excel(df)
|
97 |
-
# st.download_button(label='📥 Download Current Result',
|
98 |
-
# data=df_xlsx ,
|
99 |
-
# file_name= 'file_target.xlsx')
|
100 |
|
101 |
def target_display():
|
102 |
if 'key1' in st.session_state:
|
103 |
-
df = st.session_state.key1
|
104 |
-
|
|
|
105 |
|
|
|
|
|
|
|
|
|
|
|
106 |
hits = df[df['Target Label'] == 'TARGET']
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
# for x in hits['Sector Label']])
|
116 |
-
|
117 |
-
# # count_df = df['Target Label'].value_counts()
|
118 |
-
# # count_df = count_df.rename('count')
|
119 |
-
# # count_df = count_df.rename_axis('Target Label').reset_index()
|
120 |
-
# # count_df['Label_def'] = count_df['Target Label'].apply(lambda x: _lab_dict[x])
|
121 |
-
|
122 |
-
# # fig = px.bar(count_df, y="Label_def", x="count", orientation='h', height=200)
|
123 |
-
# c1, c2 = st.columns([1,1])
|
124 |
-
# with c1:
|
125 |
-
# st.write('**Target Paragraphs**: `{}`'.format(count_target))
|
126 |
-
# st.write('**NetZero Related Paragraphs**: `{}`'.format(count_netzero))
|
127 |
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
# else:
|
148 |
-
# st.info("🤔 No Netzero paragraph found")
|
149 |
|
150 |
# # st.write("**Result {}** `page {}` (Relevancy Score: {:.2f})'".format(i+1,hits.iloc[i]['page'],hits.iloc[i]['Relevancy'])")
|
151 |
# st.write('-------------------')
|
@@ -173,4 +143,22 @@ def target_display():
|
|
173 |
# file_name= os.path.splitext(st.session_state['filename'])[0]+'.xlsx')
|
174 |
|
175 |
# else:
|
176 |
-
# st.info("🤔 No Targets found")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
return processed_data
|
60 |
|
61 |
def app():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
### Main app code ###
|
63 |
with st.container():
|
64 |
if 'key0' in st.session_state:
|
|
|
75 |
df = target_classification(haystack_doc=df,
|
76 |
threshold= params['threshold'])
|
77 |
st.session_state.key1 = df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
def target_display():
|
80 |
if 'key1' in st.session_state:
|
81 |
+
df = st.session_state.key1
|
82 |
+
st.info('**{}** with ~{} pages is splitted into {} paragraphs/text chunks (page number is **True** only for pdf files)'\
|
83 |
+
.format(os.path.basename(st.session_state['filename']),st.session_state['pages'], len(df)), icon="ℹ️")
|
84 |
|
85 |
+
st.caption("""Some Targets are an intention to achieve a specific result, \
|
86 |
+
for example, to reduce GHG emissions to a specific level \
|
87 |
+
(a GHG target) or increase energy efficiency or renewable \
|
88 |
+
energy to a specific level (a non-GHG target), typically by \
|
89 |
+
a certain date""")
|
90 |
hits = df[df['Target Label'] == 'TARGET']
|
91 |
+
range_val = min(5,len(hits))
|
92 |
+
if range_val !=0:
|
93 |
+
# collecting some statistics
|
94 |
+
count_target = sum(hits['Target Label'] == 'TARGET')
|
95 |
+
count_netzero = sum(hits['Netzero Label'] == 'NETZERO')
|
96 |
+
count_ghg = sum(hits['GHG Label'] == 'GHG')
|
97 |
+
count_economy = sum([True if 'Economy-wide' in x else False
|
98 |
+
for x in hits['Sector Label']])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
|
100 |
+
c1, c2 = st.columns([1,1])
|
101 |
+
with c1:
|
102 |
+
st.write('**Target Paragraphs**: `{}`'.format(count_target))
|
103 |
+
st.write('**NetZero Related Paragraphs**: `{}`'.format(count_netzero))
|
104 |
+
with c2:
|
105 |
+
st.write('**GHG Target Related Paragraphs**: `{}`'.format(count_ghg))
|
106 |
+
st.write('**Economy-wide Related Paragraphs**: `{}`'.format(count_economy))
|
107 |
+
st.write('-------------------')
|
108 |
+
hits = hits.sort_values(by=['Relevancy'], ascending=False)
|
109 |
+
netzerohit = hits[hits['Netzero Label'] == 'NETZERO']
|
110 |
+
if not netzerohit.empty:
|
111 |
+
netzerohit = netzerohit.sort_values(by = ['Netzero Score'], ascending = False)
|
112 |
+
# st.write('-------------------')
|
113 |
+
# st.markdown("###### Netzero paragraph ######")
|
114 |
+
st.write('**Netzero paragraph** `page {}`: {}'.format(netzerohit.iloc[0]['page'],
|
115 |
+
netzerohit.iloc[0]['text'].replace("\n", " ")))
|
116 |
+
st.write("")
|
117 |
+
else:
|
118 |
+
st.info("🤔 No Netzero paragraph found")
|
|
|
|
|
119 |
|
120 |
# # st.write("**Result {}** `page {}` (Relevancy Score: {:.2f})'".format(i+1,hits.iloc[i]['page'],hits.iloc[i]['Relevancy'])")
|
121 |
# st.write('-------------------')
|
|
|
143 |
# file_name= os.path.splitext(st.session_state['filename'])[0]+'.xlsx')
|
144 |
|
145 |
# else:
|
146 |
+
# st.info("🤔 No Targets found")
|
147 |
+
# count_df = df['Target Label'].value_counts()
|
148 |
+
# count_df = count_df.rename('count')
|
149 |
+
# count_df = count_df.rename_axis('Target Label').reset_index()
|
150 |
+
# count_df['Label_def'] = count_df['Target Label'].apply(lambda x: _lab_dict[x])
|
151 |
+
# st.plotly_chart(fig,use_container_width= True)
|
152 |
+
|
153 |
+
# count_netzero = sum(hits['Netzero Label'] == 'NETZERO')
|
154 |
+
# count_ghg = sum(hits['GHG Label'] == 'LABEL_2')
|
155 |
+
# count_economy = sum([True if 'Economy-wide' in x else False
|
156 |
+
# for x in hits['Sector Label']])
|
157 |
+
# # excel part
|
158 |
+
# temp = df[df['Relevancy']>threshold]
|
159 |
+
|
160 |
+
# df['Validation'] = 'No'
|
161 |
+
# df_xlsx = to_excel(df)
|
162 |
+
# st.download_button(label='📥 Download Current Result',
|
163 |
+
# data=df_xlsx ,
|
164 |
+
# file_name= 'file_target.xlsx')
|