Paula Leonova
commited on
Commit
·
ee24d8b
1
Parent(s):
b1bf232
Append ground truth labels to matched table
Browse files
app.py
CHANGED
@@ -155,6 +155,11 @@ if submit_button or example_button:
|
|
155 |
elif uploaded_csv_text_files is not None:
|
156 |
text_df = pd.read_csv(uploaded_csv_text_files)
|
157 |
|
|
|
|
|
|
|
|
|
|
|
158 |
|
159 |
with st.spinner('Breaking up text into more reasonable chunks (transformers cannot exceed a 1024 token max)...'):
|
160 |
# For each body of text, create text chunks of a certain token size required for the transformer
|
@@ -187,10 +192,7 @@ if submit_button or example_button:
|
|
187 |
kw_df0 = pd.DataFrame.from_dict(kw_dict).reset_index()
|
188 |
kw_df0.rename(columns={'index': 'keyword'}, inplace=True)
|
189 |
kw_df = pd.melt(kw_df0, id_vars=['keyword'], var_name='title', value_name='score').dropna()
|
190 |
-
|
191 |
-
title_element = []
|
192 |
-
else:
|
193 |
-
title_element = ['title']
|
194 |
kw_column_list = ['keyword', 'score']
|
195 |
kw_df = kw_df[kw_df['score'] > 0.25][title_element + kw_column_list].sort_values(title_element + ['score'], ascending=False).reset_index().drop(columns='index')
|
196 |
|
@@ -283,6 +285,14 @@ if submit_button or example_button:
|
|
283 |
else:
|
284 |
label_match_df = labels_full_df.copy()
|
285 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
286 |
st.dataframe(label_match_df)
|
287 |
st.download_button(
|
288 |
label="Download data as CSV",
|
@@ -291,33 +301,6 @@ if submit_button or example_button:
|
|
291 |
mime='title_label_sum_full/csv',
|
292 |
)
|
293 |
|
294 |
-
if len(glabels) > 0:
|
295 |
-
gdata = pd.DataFrame({'label': glabels})
|
296 |
-
gdata['is_true_label'] = int(1)
|
297 |
-
|
298 |
-
data2 = pd.merge(data2, gdata, how = 'left', on = ['label'])
|
299 |
-
data2['is_true_label'].fillna(0, inplace = True)
|
300 |
-
|
301 |
-
st.markdown("### Data Table")
|
302 |
-
with st.spinner('Generating a table of results and a download link...'):
|
303 |
-
st.dataframe(data2)
|
304 |
-
|
305 |
-
@st.cache
|
306 |
-
def convert_df(df):
|
307 |
-
# IMPORTANT: Cache the conversion to prevent computation on every rerun
|
308 |
-
return df.to_csv().encode('utf-8')
|
309 |
-
csv = convert_df(data2)
|
310 |
-
st.download_button(
|
311 |
-
label="Download data as CSV",
|
312 |
-
data=csv,
|
313 |
-
file_name='text_labels.csv',
|
314 |
-
mime='text/csv',
|
315 |
-
)
|
316 |
-
# coded_data = base64.b64encode(data2.to_csv(index = False). encode ()).decode()
|
317 |
-
# st.markdown(
|
318 |
-
# f'<a href="data:file/csv;base64, {coded_data}" download = "data.csv">Click here to download the data</a>',
|
319 |
-
# unsafe_allow_html = True
|
320 |
-
# )
|
321 |
|
322 |
if len(glabels) > 0:
|
323 |
st.markdown("### Evaluation Metrics")
|
|
|
155 |
elif uploaded_csv_text_files is not None:
|
156 |
text_df = pd.read_csv(uploaded_csv_text_files)
|
157 |
|
158 |
+
# Which input was used? If text area was used, ignore the 'title'
|
159 |
+
if len(text_input) != 0:
|
160 |
+
title_element = []
|
161 |
+
else:
|
162 |
+
title_element = ['title']
|
163 |
|
164 |
with st.spinner('Breaking up text into more reasonable chunks (transformers cannot exceed a 1024 token max)...'):
|
165 |
# For each body of text, create text chunks of a certain token size required for the transformer
|
|
|
192 |
kw_df0 = pd.DataFrame.from_dict(kw_dict).reset_index()
|
193 |
kw_df0.rename(columns={'index': 'keyword'}, inplace=True)
|
194 |
kw_df = pd.melt(kw_df0, id_vars=['keyword'], var_name='title', value_name='score').dropna()
|
195 |
+
|
|
|
|
|
|
|
196 |
kw_column_list = ['keyword', 'score']
|
197 |
kw_df = kw_df[kw_df['score'] > 0.25][title_element + kw_column_list].sort_values(title_element + ['score'], ascending=False).reset_index().drop(columns='index')
|
198 |
|
|
|
285 |
else:
|
286 |
label_match_df = labels_full_df.copy()
|
287 |
|
288 |
+
# TO DO: ADD Flexibility for csv import
|
289 |
+
if len(glabels) > 0:
|
290 |
+
gdata = pd.DataFrame({'label': glabels})
|
291 |
+
gdata['is_true_label'] = True
|
292 |
+
|
293 |
+
label_match_df = pd.merge(label_match_df, gdata, how = 'left', on = title_element + ['label'])
|
294 |
+
label_match_df['correct_match'].fillna(0, inplace = True)
|
295 |
+
|
296 |
st.dataframe(label_match_df)
|
297 |
st.download_button(
|
298 |
label="Download data as CSV",
|
|
|
301 |
mime='title_label_sum_full/csv',
|
302 |
)
|
303 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
304 |
|
305 |
if len(glabels) > 0:
|
306 |
st.markdown("### Evaluation Metrics")
|