Truong-Phuc commited on
Commit
88424d7
β€’
1 Parent(s): 9a9bf9e

Update Upload file function

Browse files
Files changed (1) hide show
  1. app.py +42 -40
app.py CHANGED
@@ -1,64 +1,66 @@
1
  import streamlit as st
2
  import pandas as pd
3
  import re
4
- import base64
5
 
6
- st.set_page_config(layout='wide')
7
 
8
- def load_data(file):
9
- if file is not None:
10
- return pd.read_csv(file)
 
11
  else:
12
  return pd.DataFrame(columns=['context', 'question', 'answer'])
 
 
 
 
 
13
 
14
- def download_csv(dataframe):
15
- csv = dataframe.to_csv(index=False)
16
- b64 = base64.b64encode(csv.encode()).decode()
17
- href = f'<a href="data:file/csv;base64,{b64}" download="checked_data.csv">Download CSV File</a>'
18
- st.markdown(href, unsafe_allow_html=True)
19
 
20
- uploaded_file = st.file_uploader("Upload CSV file", type=['csv'])
 
21
 
22
- df = load_data(uploaded_file)
 
23
 
24
  if 'idx' not in st.session_state:
25
  st.session_state.idx = 0
26
 
27
- st.markdown("<h1 style='text-align: center;'>Investigation Legal Documents Dataset Checker</h1>", unsafe_allow_html=True)
28
 
29
  col_1, col_2, col_3, col_4, col_5, col_6, col_7, col_8, col_9, col_10 = st.columns([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
30
- btn_prev = col_1.button(label='Previous sample', use_container_width=True)
31
- btn_next = col_2.button(label='Next sample', use_container_width=True)
32
- btn_save = col_3.button(label='Save changes', use_container_width=True)
33
- btn_download = col_4.button(label='Download CSV', use_container_width=True)
34
 
35
- if btn_prev:
36
- if st.session_state.idx > 0:
37
- st.session_state.idx -= 1
 
38
 
39
- if btn_next:
40
- if st.session_state.idx < len(df) - 1:
41
- st.session_state.idx += 1
42
 
43
- st.markdown(f"<h3 style='text-align: center;'>Sample: {st.session_state.idx+1}/{len(df)}</h3>", unsafe_allow_html=True)
 
 
44
 
45
- if not df.empty:
46
- context = st.text_area(label='Your context: ', value=df['context'][st.session_state.idx], height=300)
47
- question = st.text_area(label='Your question: ', value=df['question'][st.session_state.idx], height=100)
48
- answer = st.text_area(label='Your answer: ', value=df['answer'][st.session_state.idx], height=100)
49
-
50
- if answer.strip() and context.strip():
51
- highlighted_context = re.sub(re.escape(answer), "<mark>" + answer + "</mark>", context, flags=re.IGNORECASE)
52
  st.markdown(highlighted_context, unsafe_allow_html=True)
53
 
54
- if btn_save:
55
- df.loc[st.session_state.idx, 'context'] = context
56
- df.loc[st.session_state.idx, 'question'] = question
57
- df.loc[st.session_state.idx, 'answer'] = answer
58
 
59
- if uploaded_file is not None:
60
- uploaded_file.seek(0)
61
- df.to_csv(uploaded_file, index=False)
 
62
 
63
- if btn_download:
64
- download_csv(df)
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
3
  import re
 
4
 
5
+ st.set_page_config(page_icon='πŸƒ', page_title='MRC for Legal Document Dataset checker', layout='wide', initial_sidebar_state="collapsed")
6
 
7
+ # start processing events
8
+ def load_data(file_uploader):
9
+ if file_uploader is not None:
10
+ return pd.read_csv(file_uploader)
11
  else:
12
  return pd.DataFrame(columns=['context', 'question', 'answer'])
13
+
14
+ def convert_df(df):
15
+ # IMPORTANT: Cache the conversion to prevent computation on every rerun
16
+ return df.to_csv().encode("utf-8")
17
+ # end processing events
18
 
19
+ st.markdown("<h1 style='text-align: center;'>Investigation Legal Dataset checker for Machine Reading Comprehension</h1>", unsafe_allow_html=True)
 
 
 
 
20
 
21
+ file = st.file_uploader(label='Upload your file here:', type=['csv'], accept_multiple_files=False, label_visibility='hidden')
22
+ df = load_data(file_uploader=file)
23
 
24
+ if 'df' not in st.session_state:
25
+ st.session_state.df = df
26
 
27
  if 'idx' not in st.session_state:
28
  st.session_state.idx = 0
29
 
30
+ st.markdown(f"<h3 style='text-align: center;'>Sample {st.session_state.idx + 1}/{len(df)}</h3>", unsafe_allow_html=True)
31
 
32
  col_1, col_2, col_3, col_4, col_5, col_6, col_7, col_8, col_9, col_10 = st.columns([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
 
 
 
 
33
 
34
+ btn_previous = col_1.button(label=':arrow_backward: Previous sample', use_container_width=True)
35
+ btn_next = col_2.button(label='Next sample :arrow_forward:', use_container_width=True)
36
+ btn_save = col_3.button(label=':heavy_check_mark: Save change', use_container_width=True)
37
+ # txt_goto = col_4.selectbox(label='None', options=[np.array(range(len(df)))], label_visibility='collapsed')
38
 
39
+ if len(df) != 0:
40
+ index = st.session_state.idx
 
41
 
42
+ txt_context = st.text_area(height=300, label='Your context:', value=st.session_state.df.loc[index, 'context'])
43
+ txt_question = st.text_area(height=100, label='Your question:', value=st.session_state.df.loc[index, 'question'])
44
+ txt_answer = st.text_area(height=100, label='Your answer:', value=st.session_state.df.loc[index, 'answer'])
45
 
46
+ if txt_answer.strip() and txt_context.strip():
47
+ highlighted_context = re.sub(re.escape(txt_answer), "<mark>" + txt_answer + "</mark>", txt_context, flags=re.IGNORECASE)
 
 
 
 
 
48
  st.markdown(highlighted_context, unsafe_allow_html=True)
49
 
 
 
 
 
50
 
51
+ if btn_next:
52
+ if index < len(df) - 1:
53
+ st.session_state.idx += 1
54
+ st.rerun()
55
 
56
+ if btn_save:
57
+ st.session_state.df.loc[st.session_state.idx, 'context'] = txt_context
58
+ st.session_state.df.loc[st.session_state.idx, 'question'] = txt_question
59
+ st.session_state.df.loc[st.session_state.idx, 'answer'] = txt_answer
60
+ csv_file = convert_df(df=st.session_state.df)
61
+ btn_download = col_4.download_button(data=csv_file, label=':arrow_down_small: Download file', use_container_width=True, file_name="large_df.csv", mime="text/csv")
62
+
63
+ if btn_previous:
64
+ if index > 0:
65
+ st.session_state.idx -= 1
66
+ st.rerun()