Truong-Phuc Nguyen commited on
Commit
a578005
·
verified ·
1 Parent(s): ab7c707

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +46 -0
  2. train.csv +0 -0
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import re
4
+
5
+ st.set_page_config(layout='wide')
6
+
7
+ def load_data():
8
+ return pd.read_csv(filepath_or_buffer='./train.csv')
9
+
10
+ df = load_data()
11
+
12
+ if 'idx' not in st.session_state:
13
+ st.session_state.idx = 0
14
+
15
+ st.markdown("<h1 style='text-align: center;'>Investigation Legal Documents Dataset Checker</h1>", unsafe_allow_html=True)
16
+
17
+
18
+ col_1, col_2, col_3, col_4, col_5, col_6, col_7, col_8, col_9, col_10 = st.columns([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
19
+ btn_prev = col_1.button(label='Previous sample', use_container_width=True)
20
+ btn_next = col_2.button(label='Next sample', use_container_width=True)
21
+ btn_save = col_3.button(label='Save changes', use_container_width=True)
22
+
23
+ if btn_prev:
24
+ if st.session_state.idx > 0:
25
+ st.session_state.idx -= 1
26
+
27
+ if btn_next:
28
+ if st.session_state.idx < len(df) - 1:
29
+ st.session_state.idx += 1
30
+
31
+ st.markdown(f"<h3 style='text-align: center;'>Sample: {st.session_state.idx+1}/{len(df)}</h3>", unsafe_allow_html=True)
32
+
33
+ context = st.text_area(label='Your context: ', value=df['relevant_articles'][st.session_state.idx], height=300)
34
+ question = st.text_area(label='Your question: ', value=df['text'][st.session_state.idx], height=100)
35
+ answer = st.text_area(label='Your answer: ', value=df['answer'][st.session_state.idx], height=100)
36
+
37
+ if answer.strip() and context.strip():
38
+ highlighted_context = re.sub(re.escape(answer), "<mark>" + answer + "</mark>", context, flags=re.IGNORECASE)
39
+ st.markdown(highlighted_context, unsafe_allow_html=True)
40
+
41
+ if btn_save:
42
+ df.loc[st.session_state.idx, 'relevant_articles'] = context
43
+ df.loc[st.session_state.idx, 'text'] = question
44
+ df.loc[st.session_state.idx, 'answer'] = answer
45
+
46
+ df.to_csv('./train.csv', index=False)
train.csv ADDED
The diff for this file is too large to render. See raw diff