File size: 3,908 Bytes
a578005
80f304b
a578005
 
88424d7
a578005
cab36fb
00b5c9c
936dcde
a578005
 
2f36d79
a578005
cab36fb
a578005
 
 
cab36fb
 
 
5c7750f
cab36fb
a578005
88424d7
5c7750f
cab36fb
 
 
a578005
440b9c5
 
 
 
5c7750f
440b9c5
 
 
5c7750f
88424d7
 
00b5c9c
 
80f304b
 
 
 
 
 
9ad151d
88424d7
80f304b
88424d7
80f304b
 
 
 
88424d7
80f304b
 
 
5c7750f
440b9c5
 
 
cab36fb
abd2dd9
5c7750f
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import pandas as pd
import streamlit as st
import re

st.set_page_config(page_icon='🍃', page_title='MRC for Legal Document Dataset checker', layout='wide', initial_sidebar_state="collapsed")

st.markdown("<h2 style='text-align: center;'>MRC Dataset Checker</h2>", unsafe_allow_html=True)

df = pd.read_csv(filepath_or_buffer='./dup_non-span.csv')

if 'idx' not in st.session_state:
    st.session_state.idx = 0

st.markdown(f"<h4 style='text-align: center;'>Bản ghi {st.session_state.idx + 1}/{len(df)}</h4>", unsafe_allow_html=True)

col_1, col_2, col_3, col_4, col_5, col_6, col_7, col_8, col_9, col_10 = st.columns([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

btn_previous = col_1.button(label=':arrow_backward: Bản ghi trước', use_container_width=True)
btn_next = col_2.button(label='Bản ghi sau :arrow_forward:', use_container_width=True)
btn_save = col_3.button(label=':heavy_check_mark: Lưu thay đổi', use_container_width=True)
txt_goto = col_5.selectbox(label='Sample', label_visibility='collapsed', options=list(range(1, len(df) + 1)))
btn_goto = col_6.button(label=':fast_forward: Chuyển đến', use_container_width=True)

if len(df) != 0:
    col_x1, col_x2 = st.columns([8.5, 1.5])
    txt_context = col_x1.text_area(height=300, label='Ngữ cảnh:', value=df['context'][st.session_state.idx])
    txt_question = st.text_area(height=100, label='Câu hỏi:', value=df['question'][st.session_state.idx])
    txt_answer = st.text_area(height=100, label='Câu trả lời:', value=df['answer'][st.session_state.idx])

    # options = ['Never been evaluated', 'Tệ', 'Chấp nhận được', 'Tốt']
    # criteria_1_value = df['criteria_1'][st.session_state.idx] if 'criteria_1' in df.columns else 'Never been evaluated'
    # criteria_2_value = df['criteria_2'][st.session_state.idx] if 'criteria_2' in df.columns else 'Never been evaluated'
    # criteria_3_value = df['criteria_3'][st.session_state.idx] if 'criteria_3' in df.columns else 'Never been evaluated'
    
    # criteria_1 = col_x2.selectbox(label='Câu hỏi có tự nhiên, bao quát, phù hợp với nội dung không?', options=['Never been evaluated', 'Tệ', 'Chấp nhận được', 'Tốt'], index=options.index(criteria_1_value))
    # criteria_2 = col_x2.selectbox(label='Is the answer correct, clear, and fluent?', options=['Never been evaluated', 'Tệ', 'Chấp nhận được', 'Tốt'], index=options.index(criteria_2_value))
    # criteria_3 = col_x2.selectbox(label='Do the question and answer pairs match each other?', options=['Never been evaluated', 'Tệ', 'Chấp nhận được', 'Tốt'], index=options.index(criteria_3_value))

    if txt_answer.strip() and txt_context.strip():
        highlighted_context = re.sub(re.escape(txt_answer), "<mark>" + txt_answer + "</mark>", txt_context, flags=re.IGNORECASE)
        st.markdown(highlighted_context, unsafe_allow_html=True)

    if btn_previous:
        if st.session_state.idx > 0:
            st.session_state.idx -= 1
            st.rerun()
        else:
            pass

    if btn_next:
        if st.session_state.idx <= (len(df) - 1):
            st.session_state.idx += 1
            st.rerun()
        else:
            pass
    
    if btn_save:
        df['context'][st.session_state.idx] = txt_context
        df['question'][st.session_state.idx] = txt_question
        df['answer'][st.session_state.idx] = txt_answer

        # df['criteria_1'][st.session_state.idx] = criteria_1
        # df['criteria_2'][st.session_state.idx] = criteria_2
        # df['criteria_3'][st.session_state.idx] = criteria_3
        btn_download = col_4.download_button(data=df.to_csv(), label=':arrow_down_small: Tải file', use_container_width=True, file_name="checked.csv", mime="text/csv")
        df.to_csv(path_or_buf='./LegalAbstractiveA_checked.csv', index=None)

    if btn_goto:
        st.session_state.idx = txt_goto - 1
        st.rerun()