Spaces:

Truong-Phuc
/

MRC_Dataset_Checker

Running

App Files Files Community

Truong-Phuc commited on May 6

Commit

88424d7

•

1 Parent(s): 9a9bf9e

Update Upload file function

Browse files

Files changed (1) hide show

app.py +42 -40

app.py CHANGED Viewed

@@ -1,64 +1,66 @@
 import streamlit as st
 import pandas as pd
 import re
-import base64
-st.set_page_config(layout='wide')
-def load_data(file):
-    if file is not None:
-        return pd.read_csv(file)
     else:
         return pd.DataFrame(columns=['context', 'question', 'answer'])
-def download_csv(dataframe):
-    csv = dataframe.to_csv(index=False)
-    b64 = base64.b64encode(csv.encode()).decode()
-    href = f'<a href="data:file/csv;base64,{b64}" download="checked_data.csv">Download CSV File</a>'
-    st.markdown(href, unsafe_allow_html=True)
-uploaded_file = st.file_uploader("Upload CSV file", type=['csv'])
-df = load_data(uploaded_file)
 if 'idx' not in st.session_state:
     st.session_state.idx = 0
-st.markdown("<h1 style='text-align: center;'>Investigation Legal Documents Dataset Checker</h1>", unsafe_allow_html=True)
 col_1, col_2, col_3, col_4, col_5, col_6, col_7, col_8, col_9, col_10 = st.columns([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
-btn_prev = col_1.button(label='Previous sample', use_container_width=True)
-btn_next = col_2.button(label='Next sample', use_container_width=True)
-btn_save = col_3.button(label='Save changes', use_container_width=True)
-btn_download = col_4.button(label='Download CSV', use_container_width=True)
-if btn_prev:
-    if st.session_state.idx > 0:
-        st.session_state.idx -= 1
-if btn_next:
-    if st.session_state.idx < len(df) - 1:
-        st.session_state.idx += 1
-st.markdown(f"<h3 style='text-align: center;'>Sample: {st.session_state.idx+1}/{len(df)}</h3>", unsafe_allow_html=True)
-if not df.empty:
-    context = st.text_area(label='Your context: ', value=df['context'][st.session_state.idx], height=300)
-    question = st.text_area(label='Your question: ', value=df['question'][st.session_state.idx], height=100)
-    answer = st.text_area(label='Your answer: ', value=df['answer'][st.session_state.idx], height=100)
-    if answer.strip() and context.strip():
-        highlighted_context = re.sub(re.escape(answer), "<mark>" + answer + "</mark>", context, flags=re.IGNORECASE)
         st.markdown(highlighted_context, unsafe_allow_html=True)
-    if btn_save:
-        df.loc[st.session_state.idx, 'context'] = context
-        df.loc[st.session_state.idx, 'question'] = question
-        df.loc[st.session_state.idx, 'answer'] = answer
-        if uploaded_file is not None:
-            uploaded_file.seek(0)
-            df.to_csv(uploaded_file, index=False)
-if btn_download:
-    download_csv(df)

 import streamlit as st
 import pandas as pd
 import re
+st.set_page_config(page_icon='🍃', page_title='MRC for Legal Document Dataset checker', layout='wide', initial_sidebar_state="collapsed")
+# start processing events
+def load_data(file_uploader):
+    if file_uploader is not None:
+        return pd.read_csv(file_uploader)
     else:
         return pd.DataFrame(columns=['context', 'question', 'answer'])
+def convert_df(df):
+    # IMPORTANT: Cache the conversion to prevent computation on every rerun
+    return df.to_csv().encode("utf-8")
+# end processing events
+st.markdown("<h1 style='text-align: center;'>Investigation Legal Dataset checker for Machine Reading Comprehension</h1>", unsafe_allow_html=True)
+file = st.file_uploader(label='Upload your file here:', type=['csv'], accept_multiple_files=False, label_visibility='hidden')
+df = load_data(file_uploader=file)
+if 'df' not in st.session_state:
+    st.session_state.df = df
 if 'idx' not in st.session_state:
     st.session_state.idx = 0
+st.markdown(f"<h3 style='text-align: center;'>Sample {st.session_state.idx + 1}/{len(df)}</h3>", unsafe_allow_html=True)
 col_1, col_2, col_3, col_4, col_5, col_6, col_7, col_8, col_9, col_10 = st.columns([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
+btn_previous = col_1.button(label=':arrow_backward: Previous sample', use_container_width=True)
+btn_next = col_2.button(label='Next sample :arrow_forward:', use_container_width=True)
+btn_save = col_3.button(label=':heavy_check_mark: Save change', use_container_width=True)
+# txt_goto = col_4.selectbox(label='None', options=[np.array(range(len(df)))], label_visibility='collapsed')
+if len(df) != 0:
+    index = st.session_state.idx
+    txt_context = st.text_area(height=300, label='Your context:', value=st.session_state.df.loc[index, 'context'])
+    txt_question = st.text_area(height=100, label='Your question:', value=st.session_state.df.loc[index, 'question'])
+    txt_answer = st.text_area(height=100, label='Your answer:', value=st.session_state.df.loc[index, 'answer'])
+    if txt_answer.strip() and txt_context.strip():
+        highlighted_context = re.sub(re.escape(txt_answer), "<mark>" + txt_answer + "</mark>", txt_context, flags=re.IGNORECASE)
         st.markdown(highlighted_context, unsafe_allow_html=True)
+    if btn_next:
+        if index < len(df) - 1:
+            st.session_state.idx += 1
+            st.rerun()
+    if btn_save:
+        st.session_state.df.loc[st.session_state.idx, 'context'] = txt_context
+        st.session_state.df.loc[st.session_state.idx, 'question'] = txt_question
+        st.session_state.df.loc[st.session_state.idx, 'answer'] = txt_answer
+        csv_file = convert_df(df=st.session_state.df)
+        btn_download = col_4.download_button(data=csv_file, label=':arrow_down_small: Download file', use_container_width=True, file_name="large_df.csv", mime="text/csv")
+    if btn_previous:
+        if index > 0:
+            st.session_state.idx -= 1
+            st.rerun()