File size: 3,310 Bytes
5bcc73a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d805d95
5bcc73a
 
 
 
 
 
 
 
 
 
 
d805d95
 
 
 
5bcc73a
 
d805d95
5bcc73a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d805d95
5bcc73a
d805d95
5bcc73a
 
 
 
 
 
 
 
 
 
 
d805d95
5bcc73a
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import streamlit as st
from pathlib import Path
from uuid import uuid4
import csv
from datetime import datetime, timezone

from huggingface_hub import CommitScheduler


CSV_DATASET_DIR = Path("flagged_rows")
CSV_DATASET_DIR.mkdir(parents=True, exist_ok=True)

CSV_DATASET_PATH = CSV_DATASET_DIR / f"train-{uuid4()}.csv"

wrote_header = False


def write_header(writer):
    writer.writerow(
        [
            "date",
            "grascii",
            "longhand",
            "incorrect_grascii",
            "incorrect_longhand",
            "incorrect_shorthand",
            "improperly_cropped",
            "extraneous_marks",
        ]
    )
    global wrote_header
    wrote_header = True


scheduler = CommitScheduler(
    repo_id=st.secrets.FEEDBACK_REPO,
    repo_type="dataset",
    folder_path=CSV_DATASET_DIR,
    path_in_repo="data",
    every=15,
    token=st.secrets.HF_TOKEN,
)


@st.dialog("Flag Results for Review", width="large")
def report_dialog(data):
    st.write("Please select one or more reasons for flagging each row:")

    report_df = data
    report_df["3"] = False
    report_df["4"] = False
    report_df["5"] = False
    report_df["6"] = False
    report_df["7"] = False
    final_report = st.data_editor(
        report_df,
        hide_index=True,
        column_config={
            "0": "Grascii",
            "1": "Longhand",
            "2": st.column_config.ImageColumn("Shorthand", width="medium"),
            "3": st.column_config.CheckboxColumn("Grascii is incorrect"),
            "4": st.column_config.CheckboxColumn("Longhand is incorrect"),
            "5": st.column_config.CheckboxColumn("Shorthand image is incorrect"),
            "6": st.column_config.CheckboxColumn(
                "Shorthand image is improperly cropped"
            ),
            "7": st.column_config.CheckboxColumn(
                "Shorthand image contains extraneous marks"
            ),
        },
        disabled=["0", "1", "2"],
        use_container_width=True,
    )

    if st.button("Submit"):
        with scheduler.lock:
            with open(CSV_DATASET_PATH, "a", newline="") as f:
                writer = csv.writer(f, dialect="unix")

                def write_row(row):
                    if not wrote_header:
                        write_header(writer)
                    if any(
                        [
                            row.iloc[3],
                            row.iloc[4],
                            row.iloc[5],
                            row.iloc[6],
                            row.iloc[7],
                        ]
                    ):
                        writer.writerow(
                            [
                                datetime.now(timezone.utc).date(),
                                row.iloc[0],
                                row.iloc[1],
                                1 if row.iloc[3] else 0,
                                1 if row.iloc[4] else 0,
                                1 if row.iloc[5] else 0,
                                1 if row.iloc[6] else 0,
                                1 if row.iloc[7] else 0,
                            ]
                        )

                final_report.apply(write_row, axis=1)

        st.session_state["report_submitted"] = True
        st.rerun()