Spaces:
Sleeping
Sleeping
File size: 3,901 Bytes
a578005 80f304b a578005 88424d7 a578005 cab36fb 00b5c9c abd2dd9 a578005 2f36d79 a578005 cab36fb a578005 cab36fb 5c7750f cab36fb a578005 88424d7 5c7750f cab36fb a578005 cab36fb 5c7750f cab36fb 5c7750f 88424d7 00b5c9c 80f304b 9ad151d 88424d7 80f304b 88424d7 80f304b 88424d7 80f304b 5c7750f cab36fb abd2dd9 5c7750f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import pandas as pd
import streamlit as st
import re
st.set_page_config(page_icon='🍃', page_title='MRC for Legal Document Dataset checker', layout='wide', initial_sidebar_state="collapsed")
st.markdown("<h2 style='text-align: center;'>MRC Dataset Checker</h2>", unsafe_allow_html=True)
df = pd.read_csv(filepath_or_buffer='./LegalAbstractiveA_checked.csv')
if 'idx' not in st.session_state:
st.session_state.idx = 0
st.markdown(f"<h4 style='text-align: center;'>Bản ghi {st.session_state.idx + 1}/{len(df)}</h4>", unsafe_allow_html=True)
col_1, col_2, col_3, col_4, col_5, col_6, col_7, col_8, col_9, col_10 = st.columns([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
btn_previous = col_1.button(label=':arrow_backward: Bản ghi trước', use_container_width=True)
btn_next = col_2.button(label='Bản ghi sau :arrow_forward:', use_container_width=True)
btn_save = col_3.button(label=':heavy_check_mark: Lưu thay đổi', use_container_width=True)
txt_goto = col_5.selectbox(label='Sample', label_visibility='collapsed', options=list(range(1, len(df) + 1)))
btn_goto = col_6.button(label=':fast_forward: Chuyển đến', use_container_width=True)
if len(df) != 0:
col_x1, col_x2 = st.columns([8.5, 1.5])
txt_context = col_x1.text_area(height=300, label='Ngữ cảnh:', value=df['context'][st.session_state.idx])
txt_question = st.text_area(height=100, label='Câu hỏi:', value=df['question'][st.session_state.idx])
txt_answer = st.text_area(height=100, label='Câu trả lời:', value=df['answer'][st.session_state.idx])
options = ['Never been evaluated', 'Tệ', 'Chấp nhận được', 'Tốt']
criteria_1_value = df['criteria_1'][st.session_state.idx] if 'criteria_1' in df.columns else 'Never been evaluated'
criteria_2_value = df['criteria_2'][st.session_state.idx] if 'criteria_2' in df.columns else 'Never been evaluated'
criteria_3_value = df['criteria_3'][st.session_state.idx] if 'criteria_3' in df.columns else 'Never been evaluated'
criteria_1 = col_x2.selectbox(label='Câu hỏi có tự nhiên, bao quát, phù hợp với nội dung không?', options=['Never been evaluated', 'Tệ', 'Chấp nhận được', 'Tốt'], index=options.index(criteria_1_value))
criteria_2 = col_x2.selectbox(label='Is the answer correct, clear, and fluent?', options=['Never been evaluated', 'Tệ', 'Chấp nhận được', 'Tốt'], index=options.index(criteria_2_value))
criteria_3 = col_x2.selectbox(label='Do the question and answer pairs match each other?', options=['Never been evaluated', 'Tệ', 'Chấp nhận được', 'Tốt'], index=options.index(criteria_3_value))
if txt_answer.strip() and txt_context.strip():
highlighted_context = re.sub(re.escape(txt_answer), "<mark>" + txt_answer + "</mark>", txt_context, flags=re.IGNORECASE)
st.markdown(highlighted_context, unsafe_allow_html=True)
if btn_previous:
if st.session_state.idx > 0:
st.session_state.idx -= 1
st.rerun()
else:
pass
if btn_next:
if st.session_state.idx <= (len(df) - 1):
st.session_state.idx += 1
st.rerun()
else:
pass
if btn_save:
df['context'][st.session_state.idx] = txt_context
df['question'][st.session_state.idx] = txt_question
df['answer'][st.session_state.idx] = txt_answer
df['criteria_1'][st.session_state.idx] = criteria_1
df['criteria_2'][st.session_state.idx] = criteria_2
df['criteria_3'][st.session_state.idx] = criteria_3
btn_download = col_4.download_button(data=df.to_csv(), label=':arrow_down_small: Tải file', use_container_width=True, file_name="checked.csv", mime="text/csv")
df.to_csv(path_or_buf='./LegalAbstractiveA_checked.csv', index=None)
if btn_goto:
st.session_state.idx = txt_goto - 1
st.rerun() |