Yu Chen commited on
Commit
0641d27
·
1 Parent(s): ea3a98c

custom csv upload logic

Browse files
chatkit/backend/app/file_processor.py CHANGED
@@ -26,7 +26,9 @@ async def process_uploaded_files(
26
  ) -> dict:
27
  """Parse uploaded files and persist the structured result.
28
 
29
- Returns the parsed data dict with a `_meta` entry describing which parser ran.
 
 
30
  """
31
  parser_files: list[ParserFile] = []
32
  for file in files:
@@ -40,14 +42,49 @@ async def process_uploaded_files(
40
  if not parser_files:
41
  raise ValueError("No files uploaded")
42
 
 
 
 
 
 
43
  result = await _run_with_fallback(parser_files, data_type, description, model, parser)
 
 
44
 
45
- await delete_parsed_data(session_id, data_type)
46
 
47
- source_files = [f.filename for f in parser_files]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  raw_text = json.dumps(
49
  {
50
- "source_files": source_files,
51
  "parser": result.parser_name,
52
  "notes": list(result.notes),
53
  },
@@ -61,6 +98,8 @@ async def process_uploaded_files(
61
  result.data,
62
  )
63
 
 
 
64
  return {
65
  **result.data,
66
  "_meta": {
 
26
  ) -> dict:
27
  """Parse uploaded files and persist the structured result.
28
 
29
+ `data_type='answers'` triggers a dual extraction: the same files are parsed
30
+ twice (once for student_answers, once for teacher_answers) and both rows are
31
+ stored. The response merges both shapes under their respective keys.
32
  """
33
  parser_files: list[ParserFile] = []
34
  for file in files:
 
42
  if not parser_files:
43
  raise ValueError("No files uploaded")
44
 
45
+ if data_type == "answers":
46
+ return await _process_combined_answers(
47
+ parser_files, session_id, description, model, parser
48
+ )
49
+
50
  result = await _run_with_fallback(parser_files, data_type, description, model, parser)
51
+ await _persist(session_id, data_type, parser_files, result)
52
+ return _response_payload(result)
53
 
 
54
 
55
+ async def _process_combined_answers(
56
+ parser_files: list[ParserFile],
57
+ session_id: int,
58
+ description: str,
59
+ model: str,
60
+ parser: str,
61
+ ) -> dict:
62
+ student_result = await _run_with_fallback(
63
+ parser_files, "student_answers", description, model, parser
64
+ )
65
+ teacher_result = await _run_with_fallback(
66
+ parser_files, "teacher_answers", description, model, parser
67
+ )
68
+
69
+ await _persist(session_id, "student_answers", parser_files, student_result)
70
+ await _persist(session_id, "teacher_answers", parser_files, teacher_result)
71
+
72
+ return {
73
+ "student_answers": _response_payload(student_result),
74
+ "teacher_answers": _response_payload(teacher_result),
75
+ }
76
+
77
+
78
+ async def _persist(
79
+ session_id: int,
80
+ data_type: str,
81
+ parser_files: list[ParserFile],
82
+ result: ParserResult,
83
+ ) -> None:
84
+ await delete_parsed_data(session_id, data_type)
85
  raw_text = json.dumps(
86
  {
87
+ "source_files": [f.filename for f in parser_files],
88
  "parser": result.parser_name,
89
  "notes": list(result.notes),
90
  },
 
98
  result.data,
99
  )
100
 
101
+
102
+ def _response_payload(result: ParserResult) -> dict:
103
  return {
104
  **result.data,
105
  "_meta": {
chatkit/backend/app/main.py CHANGED
@@ -147,8 +147,11 @@ async def api_upload_files(
147
  if not session or session["user_id"] != user["id"]:
148
  raise HTTPException(status_code=404, detail="Session not found")
149
 
150
- if data_type not in ("questions", "student_answers", "teacher_answers"):
151
- raise HTTPException(status_code=400, detail="data_type must be 'questions', 'student_answers', or 'teacher_answers'")
 
 
 
152
 
153
  try:
154
  structured = await process_uploaded_files(
 
147
  if not session or session["user_id"] != user["id"]:
148
  raise HTTPException(status_code=404, detail="Session not found")
149
 
150
+ if data_type not in ("questions", "student_answers", "teacher_answers", "answers"):
151
+ raise HTTPException(
152
+ status_code=400,
153
+ detail="data_type must be 'questions', 'answers', 'student_answers', or 'teacher_answers'",
154
+ )
155
 
156
  try:
157
  structured = await process_uploaded_files(
chatkit/backend/app/parsers/__init__.py CHANGED
@@ -57,12 +57,16 @@ def pick_auto(file_bytes: bytes, filename: str, data_type: str) -> AnswerSheetPa
57
  if data_type == "questions":
58
  return _REGISTRY[LLMVisionParser.name]
59
 
 
 
 
 
60
  if get_extension(filename) == ".csv":
61
  return _REGISTRY[StrictCsvParser.name]
62
 
63
  for parser_name in (PyMuPDFTablesParser.name, PdfPlumberTablesParser.name):
64
  parser = _REGISTRY[parser_name]
65
- if parser.can_handle(file_bytes, filename, data_type):
66
  return parser
67
  return _REGISTRY[LLMVisionParser.name]
68
 
 
57
  if data_type == "questions":
58
  return _REGISTRY[LLMVisionParser.name]
59
 
60
+ # The "answers" combined zone runs the parser twice (once per subtype);
61
+ # treat it the same as student_answers for routing purposes.
62
+ routing_data_type = "student_answers" if data_type == "answers" else data_type
63
+
64
  if get_extension(filename) == ".csv":
65
  return _REGISTRY[StrictCsvParser.name]
66
 
67
  for parser_name in (PyMuPDFTablesParser.name, PdfPlumberTablesParser.name):
68
  parser = _REGISTRY[parser_name]
69
+ if parser.can_handle(file_bytes, filename, routing_data_type):
70
  return parser
71
  return _REGISTRY[LLMVisionParser.name]
72
 
chatkit/backend/app/parsers/csv_strict.py CHANGED
@@ -4,26 +4,25 @@
4
 
5
  A single CSV file represents the full answer table:
6
 
7
- - Row 0: the answer key. First cell is the name marker (e.g. `正確解答` /
 
 
8
  `標準答案` / `KEY`). Remaining cells are the correct letters per question.
9
- - Row 1..N: one row per student. First cell is the student's name. Remaining
10
  cells are the student's answer per question (single letter A-Z, `=` for
11
  correct, or blank for "did not answer").
12
 
13
- There is no separate header row. Column 0 is always the name column;
14
- remaining columns are positional Q1..QN. Other column-header text is not
15
- required and is ignored if present.
16
 
17
  ## Example
18
 
19
  ```
 
20
  正確解答,B,A,A,C,D
21
  梁祐邦,A,=,C,C,D
22
  田瑜婕,=,A,C,B,D
23
  ```
24
-
25
- When this file is uploaded to the `student_answers` zone, only rows 1..N are
26
- extracted. When uploaded to the `teacher_answers` zone, only row 0 is used.
27
  """
28
 
29
  from __future__ import annotations
@@ -44,7 +43,8 @@ class StrictCsvParser:
44
  display_name = "CSV (strict format)"
45
  description = (
46
  "Direct upload of an already-tabulated answer sheet. "
47
- "Row 0 = answer key (first cell '正確解答'), row 1+ = students. "
 
48
  "Column 0 = name; remaining columns are positional Q1..QN. "
49
  "Cells: A-Z, '=' for correct, or blank."
50
  )
@@ -137,10 +137,10 @@ def _parse_students(files: list[ParserFile], parser_name: str) -> ParserResult:
137
  rows = _read_rows(text)
138
  if not rows:
139
  raise ValueError(f"{f.filename}: CSV is empty.")
140
- if len(rows) < 2:
141
  raise ValueError(
142
- f"{f.filename}: expected the answer key on row 1 plus at least one "
143
- f"student row, got {len(rows)} row(s)."
144
  )
145
 
146
  n_cols = _column_count(rows)
@@ -150,17 +150,16 @@ def _parse_students(files: list[ParserFile], parser_name: str) -> ParserResult:
150
  )
151
  n_questions = n_cols - 1
152
 
153
- # Skip the answer-key row (typically row 0). If the first row's name
154
- # cell does not look like a key marker, warn but still skip it so the
155
- # user's positional contract is respected.
156
- first_name = rows[0][0] if rows[0] else ""
157
- if not _is_key_row(first_name):
158
  notes.append(
159
- f"{f.filename}: row 1 first column is '{first_name}', expected "
160
  f"'正確解答' / '標準答案' — treating it as the key anyway."
161
  )
162
 
163
- for idx, row in enumerate(rows[1:], start=2):
164
  padded = _pad_row(row, n_cols)
165
  name = padded[0]
166
  if not name:
@@ -201,6 +200,11 @@ def _parse_teacher(files: list[ParserFile], parser_name: str) -> ParserResult:
201
  rows = _read_rows(text)
202
  if not rows:
203
  raise ValueError(f"{f.filename}: CSV is empty.")
 
 
 
 
 
204
 
205
  n_cols = _column_count(rows)
206
  if n_cols < 2:
@@ -209,13 +213,14 @@ def _parse_teacher(files: list[ParserFile], parser_name: str) -> ParserResult:
209
  )
210
  n_questions = n_cols - 1
211
 
212
- key_row = _pad_row(rows[0], n_cols)
 
213
  notes: list[str] = list(notes_prefix)
214
 
215
  first_name = key_row[0]
216
  if not _is_key_row(first_name):
217
  notes.append(
218
- f"{f.filename}: row 1 first column is '{first_name}', expected "
219
  f"'正確解答' / '標準答案' — treating it as the key anyway."
220
  )
221
 
 
4
 
5
  A single CSV file represents the full answer table:
6
 
7
+ - Row 0: header row (column titles like `Q1, Q2, ...` — content is ignored,
8
+ only used to determine column count).
9
+ - Row 1: the answer key. First cell is the name marker (e.g. `正確解答` /
10
  `標準答案` / `KEY`). Remaining cells are the correct letters per question.
11
+ - Row 2..N: one row per student. First cell is the student's name. Remaining
12
  cells are the student's answer per question (single letter A-Z, `=` for
13
  correct, or blank for "did not answer").
14
 
15
+ Column 0 is always the name column; remaining columns are positional
16
+ Q1..QN. Header text in row 0 is not required and is ignored if present.
 
17
 
18
  ## Example
19
 
20
  ```
21
+ ,Q1,Q2,Q3,Q4,Q5
22
  正確解答,B,A,A,C,D
23
  梁祐邦,A,=,C,C,D
24
  田瑜婕,=,A,C,B,D
25
  ```
 
 
 
26
  """
27
 
28
  from __future__ import annotations
 
43
  display_name = "CSV (strict format)"
44
  description = (
45
  "Direct upload of an already-tabulated answer sheet. "
46
+ "Row 0 = header (Q1, Q2, ... — ignored), row 1 = answer key "
47
+ "(first cell '正確解答'), row 2+ = students. "
48
  "Column 0 = name; remaining columns are positional Q1..QN. "
49
  "Cells: A-Z, '=' for correct, or blank."
50
  )
 
137
  rows = _read_rows(text)
138
  if not rows:
139
  raise ValueError(f"{f.filename}: CSV is empty.")
140
+ if len(rows) < 3:
141
  raise ValueError(
142
+ f"{f.filename}: expected a header row, an answer-key row, and at "
143
+ f"least one student row (got {len(rows)} row(s))."
144
  )
145
 
146
  n_cols = _column_count(rows)
 
150
  )
151
  n_questions = n_cols - 1
152
 
153
+ # rows[0] is the column header (Q1..QN). It's ignored beyond column count.
154
+ # rows[1] should be the answer key warn if the name cell isn't a marker.
155
+ key_name = rows[1][0] if rows[1] else ""
156
+ if not _is_key_row(key_name):
 
157
  notes.append(
158
+ f"{f.filename}: row 2 first column is '{key_name}', expected "
159
  f"'正確解答' / '標準答案' — treating it as the key anyway."
160
  )
161
 
162
+ for row in rows[2:]:
163
  padded = _pad_row(row, n_cols)
164
  name = padded[0]
165
  if not name:
 
200
  rows = _read_rows(text)
201
  if not rows:
202
  raise ValueError(f"{f.filename}: CSV is empty.")
203
+ if len(rows) < 2:
204
+ raise ValueError(
205
+ f"{f.filename}: expected a header row plus an answer-key row "
206
+ f"(got {len(rows)} row(s))."
207
+ )
208
 
209
  n_cols = _column_count(rows)
210
  if n_cols < 2:
 
213
  )
214
  n_questions = n_cols - 1
215
 
216
+ # Row 0 is the column header; row 1 is the answer key.
217
+ key_row = _pad_row(rows[1], n_cols)
218
  notes: list[str] = list(notes_prefix)
219
 
220
  first_name = key_row[0]
221
  if not _is_key_row(first_name):
222
  notes.append(
223
+ f"{f.filename}: row 2 first column is '{first_name}', expected "
224
  f"'正確解答' / '標準答案' — treating it as the key anyway."
225
  )
226
 
chatkit/backend/tests/test_csv_strict.py CHANGED
@@ -23,7 +23,7 @@ def test_can_handle_csv_only_for_answer_zones():
23
 
24
  @pytest.mark.unit
25
  async def test_student_answers_basic():
26
- csv_text = "正確解答,B,A,A,C,D\n梁祐邦,A,=,C,C,D\n田瑜婕,=,A,C,B,D\n"
27
  p = get_parser("csv_strict")
28
  result = await p.parse([_file(csv_text)], "student_answers")
29
  students = result.data["students"]
@@ -35,7 +35,7 @@ async def test_student_answers_basic():
35
 
36
  @pytest.mark.unit
37
  async def test_student_answers_blank_cell_becomes_none():
38
- csv_text = "正確解答,B,A,A\n梁祐邦,A,,C\n"
39
  p = get_parser("csv_strict")
40
  result = await p.parse([_file(csv_text)], "student_answers")
41
  answers = [a["answer"] for a in result.data["students"][0]["answers"]]
@@ -44,7 +44,7 @@ async def test_student_answers_blank_cell_becomes_none():
44
 
45
  @pytest.mark.unit
46
  async def test_student_answers_legacy_dash_becomes_equals():
47
- csv_text = "正確解答,B,A,A\n梁祐邦,-,A,A\n"
48
  p = get_parser("csv_strict")
49
  result = await p.parse([_file(csv_text)], "student_answers")
50
  assert result.data["students"][0]["answers"][0]["answer"] == "="
@@ -52,7 +52,7 @@ async def test_student_answers_legacy_dash_becomes_equals():
52
 
53
  @pytest.mark.unit
54
  async def test_student_answers_skips_empty_name_rows():
55
- csv_text = "正確解答,B,A\n梁祐邦,A,A\n,B,B\n田瑜婕,B,A\n"
56
  p = get_parser("csv_strict")
57
  result = await p.parse([_file(csv_text)], "student_answers")
58
  assert [s["name"] for s in result.data["students"]] == ["梁祐邦", "田瑜婕"]
@@ -60,11 +60,11 @@ async def test_student_answers_skips_empty_name_rows():
60
 
61
 
62
  @pytest.mark.unit
63
- async def test_student_answers_warns_when_first_name_isnt_marker():
64
- csv_text = "梁祐邦,A,B\n田瑜婕,B,A\n"
65
  p = get_parser("csv_strict")
66
  result = await p.parse([_file(csv_text)], "student_answers")
67
- # First row is treated as the key student count is 1
68
  assert len(result.data["students"]) == 1
69
  assert result.data["students"][0]["name"] == "田瑜婕"
70
  assert any("treating it as the key" in n for n in result.notes)
@@ -72,7 +72,7 @@ async def test_student_answers_warns_when_first_name_isnt_marker():
72
 
73
  @pytest.mark.unit
74
  async def test_student_answers_rejects_too_few_rows():
75
- csv_text = "正確解答,A,B,C\n"
76
  p = get_parser("csv_strict")
77
  with pytest.raises(ValueError, match="at least one student"):
78
  await p.parse([_file(csv_text)], "student_answers")
@@ -80,7 +80,7 @@ async def test_student_answers_rejects_too_few_rows():
80
 
81
  @pytest.mark.unit
82
  async def test_student_answers_rejects_single_column():
83
- csv_text = "正確解答\n梁祐邦\n"
84
  p = get_parser("csv_strict")
85
  with pytest.raises(ValueError, match="at least 2 columns"):
86
  await p.parse([_file(csv_text)], "student_answers")
@@ -88,7 +88,7 @@ async def test_student_answers_rejects_single_column():
88
 
89
  @pytest.mark.unit
90
  async def test_student_answers_pads_short_rows():
91
- csv_text = "正確解答,B,A,A,C\n梁祐邦,A,=\n"
92
  p = get_parser("csv_strict")
93
  result = await p.parse([_file(csv_text)], "student_answers")
94
  answers = [a["answer"] for a in result.data["students"][0]["answers"]]
@@ -96,10 +96,9 @@ async def test_student_answers_pads_short_rows():
96
 
97
 
98
  @pytest.mark.unit
99
- async def test_student_answers_arbitrary_other_column_text_ignored():
100
- """Header text in row 0 cells beyond col 0 doesn't matter col headers
101
- are not validated, the file is positional."""
102
- csv_text = "正確解答,B,A,A\n梁祐邦,A,=,C\n"
103
  p = get_parser("csv_strict")
104
  result = await p.parse([_file(csv_text)], "student_answers")
105
  assert len(result.data["students"]) == 1
@@ -107,7 +106,7 @@ async def test_student_answers_arbitrary_other_column_text_ignored():
107
 
108
  @pytest.mark.unit
109
  async def test_teacher_answers_basic():
110
- csv_text = "正確解答,B,A,A,C,D\n梁祐邦,A,=,C,C,D\n"
111
  p = get_parser("csv_strict")
112
  result = await p.parse([_file(csv_text)], "teacher_answers")
113
  answers = result.data["answers"]
@@ -117,7 +116,7 @@ async def test_teacher_answers_basic():
117
 
118
  @pytest.mark.unit
119
  async def test_teacher_answers_rejects_equals_in_key():
120
- csv_text = "正確解答,B,=,A\n梁祐邦,A,A,A\n"
121
  p = get_parser("csv_strict")
122
  with pytest.raises(ValueError, match="must be a concrete letter"):
123
  await p.parse([_file(csv_text)], "teacher_answers")
@@ -125,7 +124,7 @@ async def test_teacher_answers_rejects_equals_in_key():
125
 
126
  @pytest.mark.unit
127
  async def test_teacher_answers_rejects_blank_cell_in_key():
128
- csv_text = "正確解答,B,,A\n梁祐邦,A,A,A\n"
129
  p = get_parser("csv_strict")
130
  with pytest.raises(ValueError, match="empty or not a letter"):
131
  await p.parse([_file(csv_text)], "teacher_answers")
@@ -133,7 +132,7 @@ async def test_teacher_answers_rejects_blank_cell_in_key():
133
 
134
  @pytest.mark.unit
135
  async def test_utf8_bom_accepted():
136
- csv_text = "正確解答,B,A\n梁祐邦,A,A\n"
137
  file = ParserFile(filename="answers.csv", content=("" + csv_text).encode("utf-8"))
138
  p = get_parser("csv_strict")
139
  result = await p.parse([file], "student_answers")
@@ -142,7 +141,7 @@ async def test_utf8_bom_accepted():
142
 
143
  @pytest.mark.unit
144
  async def test_non_utf8_raises_clear_error():
145
- csv_text = "正確解答,B,A\n梁祐邦,A,A\n"
146
  file = ParserFile(filename="answers.csv", content=csv_text.encode("big5"))
147
  p = get_parser("csv_strict")
148
  with pytest.raises(ValueError, match="UTF-8"):
@@ -165,7 +164,7 @@ def test_pick_auto_questions_never_uses_csv():
165
 
166
  @pytest.mark.unit
167
  async def test_lowercase_marker_accepted():
168
- csv_text = "key,B,A\n梁祐邦,A,A\n"
169
  p = get_parser("csv_strict")
170
  result = await p.parse([_file(csv_text)], "student_answers")
171
  # Should NOT warn — "key" is in the marker set
@@ -175,8 +174,69 @@ async def test_lowercase_marker_accepted():
175
 
176
  @pytest.mark.unit
177
  async def test_excel_trailing_empty_columns_ignored():
178
- csv_text = "正確解答,B,A,A,,\n梁祐邦,A,=,C,,\n"
179
  p = get_parser("csv_strict")
180
  result = await p.parse([_file(csv_text)], "student_answers")
181
  answers = [a["answer"] for a in result.data["students"][0]["answers"]]
182
  assert answers == ["A", "=", "C"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  @pytest.mark.unit
25
  async def test_student_answers_basic():
26
+ csv_text = ",Q1,Q2,Q3,Q4,Q5\n正確解答,B,A,A,C,D\n梁祐邦,A,=,C,C,D\n田瑜婕,=,A,C,B,D\n"
27
  p = get_parser("csv_strict")
28
  result = await p.parse([_file(csv_text)], "student_answers")
29
  students = result.data["students"]
 
35
 
36
  @pytest.mark.unit
37
  async def test_student_answers_blank_cell_becomes_none():
38
+ csv_text = ",Q1,Q2,Q3\n正確解答,B,A,A\n梁祐邦,A,,C\n"
39
  p = get_parser("csv_strict")
40
  result = await p.parse([_file(csv_text)], "student_answers")
41
  answers = [a["answer"] for a in result.data["students"][0]["answers"]]
 
44
 
45
  @pytest.mark.unit
46
  async def test_student_answers_legacy_dash_becomes_equals():
47
+ csv_text = ",Q1,Q2,Q3\n正確解答,B,A,A\n梁祐邦,-,A,A\n"
48
  p = get_parser("csv_strict")
49
  result = await p.parse([_file(csv_text)], "student_answers")
50
  assert result.data["students"][0]["answers"][0]["answer"] == "="
 
52
 
53
  @pytest.mark.unit
54
  async def test_student_answers_skips_empty_name_rows():
55
+ csv_text = ",Q1,Q2\n正確解答,B,A\n梁祐邦,A,A\n,B,B\n田瑜婕,B,A\n"
56
  p = get_parser("csv_strict")
57
  result = await p.parse([_file(csv_text)], "student_answers")
58
  assert [s["name"] for s in result.data["students"]] == ["梁祐邦", "田瑜婕"]
 
60
 
61
 
62
  @pytest.mark.unit
63
+ async def test_student_answers_warns_when_key_row_isnt_marker():
64
+ csv_text = ",Q1,Q2\n梁祐邦,A,B\n田瑜婕,B,A\n"
65
  p = get_parser("csv_strict")
66
  result = await p.parse([_file(csv_text)], "student_answers")
67
+ # Row 1 (after header) treated as the key even when name isn't a marker
68
  assert len(result.data["students"]) == 1
69
  assert result.data["students"][0]["name"] == "田瑜婕"
70
  assert any("treating it as the key" in n for n in result.notes)
 
72
 
73
  @pytest.mark.unit
74
  async def test_student_answers_rejects_too_few_rows():
75
+ csv_text = ",Q1,Q2,Q3\n正確解答,A,B,C\n"
76
  p = get_parser("csv_strict")
77
  with pytest.raises(ValueError, match="at least one student"):
78
  await p.parse([_file(csv_text)], "student_answers")
 
80
 
81
  @pytest.mark.unit
82
  async def test_student_answers_rejects_single_column():
83
+ csv_text = "header\n正確解答\n梁祐邦\n"
84
  p = get_parser("csv_strict")
85
  with pytest.raises(ValueError, match="at least 2 columns"):
86
  await p.parse([_file(csv_text)], "student_answers")
 
88
 
89
  @pytest.mark.unit
90
  async def test_student_answers_pads_short_rows():
91
+ csv_text = ",Q1,Q2,Q3,Q4\n正確解答,B,A,A,C\n梁祐邦,A,=\n"
92
  p = get_parser("csv_strict")
93
  result = await p.parse([_file(csv_text)], "student_answers")
94
  answers = [a["answer"] for a in result.data["students"][0]["answers"]]
 
96
 
97
 
98
  @pytest.mark.unit
99
+ async def test_student_answers_arbitrary_header_text_ignored():
100
+ """Row 0 header text is not validatedonly column count matters."""
101
+ csv_text = "name,foo,bar,baz\n正確解答,B,A,A\n梁祐邦,A,=,C\n"
 
102
  p = get_parser("csv_strict")
103
  result = await p.parse([_file(csv_text)], "student_answers")
104
  assert len(result.data["students"]) == 1
 
106
 
107
  @pytest.mark.unit
108
  async def test_teacher_answers_basic():
109
+ csv_text = ",Q1,Q2,Q3,Q4,Q5\n正確解答,B,A,A,C,D\n梁祐邦,A,=,C,C,D\n"
110
  p = get_parser("csv_strict")
111
  result = await p.parse([_file(csv_text)], "teacher_answers")
112
  answers = result.data["answers"]
 
116
 
117
  @pytest.mark.unit
118
  async def test_teacher_answers_rejects_equals_in_key():
119
+ csv_text = ",Q1,Q2,Q3\n正確解答,B,=,A\n梁祐邦,A,A,A\n"
120
  p = get_parser("csv_strict")
121
  with pytest.raises(ValueError, match="must be a concrete letter"):
122
  await p.parse([_file(csv_text)], "teacher_answers")
 
124
 
125
  @pytest.mark.unit
126
  async def test_teacher_answers_rejects_blank_cell_in_key():
127
+ csv_text = ",Q1,Q2,Q3\n正確解答,B,,A\n梁祐邦,A,A,A\n"
128
  p = get_parser("csv_strict")
129
  with pytest.raises(ValueError, match="empty or not a letter"):
130
  await p.parse([_file(csv_text)], "teacher_answers")
 
132
 
133
  @pytest.mark.unit
134
  async def test_utf8_bom_accepted():
135
+ csv_text = ",Q1,Q2\n正確解答,B,A\n梁祐邦,A,A\n"
136
  file = ParserFile(filename="answers.csv", content=("" + csv_text).encode("utf-8"))
137
  p = get_parser("csv_strict")
138
  result = await p.parse([file], "student_answers")
 
141
 
142
  @pytest.mark.unit
143
  async def test_non_utf8_raises_clear_error():
144
+ csv_text = ",Q1,Q2\n正確解答,B,A\n梁祐邦,A,A\n"
145
  file = ParserFile(filename="answers.csv", content=csv_text.encode("big5"))
146
  p = get_parser("csv_strict")
147
  with pytest.raises(ValueError, match="UTF-8"):
 
164
 
165
  @pytest.mark.unit
166
  async def test_lowercase_marker_accepted():
167
+ csv_text = ",Q1,Q2\nkey,B,A\n梁祐邦,A,A\n"
168
  p = get_parser("csv_strict")
169
  result = await p.parse([_file(csv_text)], "student_answers")
170
  # Should NOT warn — "key" is in the marker set
 
174
 
175
  @pytest.mark.unit
176
  async def test_excel_trailing_empty_columns_ignored():
177
+ csv_text = ",Q1,Q2,Q3,,\n正確解答,B,A,A,,\n梁祐邦,A,=,C,,\n"
178
  p = get_parser("csv_strict")
179
  result = await p.parse([_file(csv_text)], "student_answers")
180
  answers = [a["answer"] for a in result.data["students"][0]["answers"]]
181
  assert answers == ["A", "=", "C"]
182
+
183
+
184
+ # ---------------------------------------------------------------------------
185
+ # Combined "answers" data_type via process_uploaded_files
186
+ # ---------------------------------------------------------------------------
187
+
188
+
189
+ class _FakeUpload:
190
+ """Minimal stand-in for FastAPI's UploadFile."""
191
+
192
+ def __init__(self, filename: str, content: bytes) -> None:
193
+ self.filename = filename
194
+ self._content = content
195
+
196
+ async def read(self) -> bytes:
197
+ return self._content
198
+
199
+
200
+ @pytest.mark.unit
201
+ async def test_combined_answers_persists_both_subtypes(tmp_path, monkeypatch):
202
+ """`data_type='answers'` runs the parser twice and stores both rows."""
203
+ from app import database
204
+ from app.file_processor import process_uploaded_files
205
+
206
+ # Point the DB at a fresh file
207
+ db_file = tmp_path / "test.db"
208
+ monkeypatch.setattr(database, "DATABASE_PATH", db_file)
209
+ monkeypatch.setattr(
210
+ database.get_settings.__wrapped__, "__defaults__", None, raising=False
211
+ )
212
+ # Ensure config returns no override
213
+ from app import config as config_module
214
+
215
+ cached = config_module.get_settings()
216
+ monkeypatch.setattr(cached, "database_path", "", raising=False)
217
+
218
+ await database.init_database()
219
+ user_id = await database.create_user("t@example.com", "h", "T")
220
+ session_id = await database.create_session(user_id, "S")
221
+
222
+ csv_text = ",Q1,Q2,Q3,Q4,Q5\n正確解答,B,A,A,C,D\n梁祐邦,A,=,C,C,D\n田瑜婕,=,A,C,B,D\n"
223
+ upload = _FakeUpload("answers.csv", csv_text.encode("utf-8"))
224
+
225
+ response = await process_uploaded_files(
226
+ [upload],
227
+ "answers",
228
+ session_id,
229
+ parser="csv_strict",
230
+ )
231
+
232
+ assert "student_answers" in response
233
+ assert "teacher_answers" in response
234
+ assert len(response["student_answers"]["students"]) == 2
235
+ assert len(response["teacher_answers"]["answers"]) == 5
236
+
237
+ rows = await database.get_parsed_data(session_id)
238
+ by_type = {r["data_type"]: r for r in rows}
239
+ assert "student_answers" in by_type
240
+ assert "teacher_answers" in by_type
241
+ assert len(by_type["student_answers"]["structured_data"]["students"]) == 2
242
+ assert len(by_type["teacher_answers"]["structured_data"]["answers"]) == 5
chatkit/frontend/src/components/step1/FileUploadPanel.tsx CHANGED
@@ -16,7 +16,7 @@ interface FileUploadPanelProps {
16
  }
17
 
18
  interface UploadZone {
19
- type: "questions" | "student_answers" | "teacher_answers";
20
  label: string;
21
  description: string;
22
  placeholder: string;
@@ -32,17 +32,11 @@ const zones: UploadZone[] = [
32
  icon: "📝",
33
  },
34
  {
35
- type: "student_answers",
36
- label: "學生答案",
37
- description: "上傳學生作答資料任意格式)",
38
- placeholder: "例如:35 位學生的答案卷,手寫掃描",
39
- icon: "👨‍🎓",
40
- },
41
- {
42
- type: "teacher_answers",
43
- label: "標準答案",
44
- description: "上傳教師答案/解答(任意格式)",
45
- placeholder: "例如:教師提供的標準答案與解題步驟",
46
  icon: "✅",
47
  },
48
  ];
@@ -55,13 +49,11 @@ export function FileUploadPanel({
55
  }: FileUploadPanelProps) {
56
  const [files, setFiles] = useState<Record<string, File[]>>({
57
  questions: [],
58
- student_answers: [],
59
- teacher_answers: [],
60
  });
61
  const [descriptions, setDescriptions] = useState<Record<string, string>>({
62
  questions: "",
63
- student_answers: "",
64
- teacher_answers: "",
65
  });
66
  const [analyzing, setAnalyzing] = useState<Record<string, boolean>>({});
67
  const [errors, setErrors] = useState<Record<string, string>>({});
@@ -69,8 +61,7 @@ export function FileUploadPanel({
69
  const [parsers, setParsers] = useState<ParserOption[]>([]);
70
  const [parserByZone, setParserByZone] = useState<Record<string, string>>({
71
  questions: "auto",
72
- student_answers: "auto",
73
- teacher_answers: "auto",
74
  });
75
  const fileInputRefs = useRef<Record<string, HTMLInputElement | null>>({});
76
 
@@ -108,7 +99,17 @@ export function FileUploadPanel({
108
  formData.append("files", f);
109
  }
110
  const res = await apiUpload<{ data: unknown }>(`/api/sessions/${sessionId}/upload`, formData);
111
- onParsedDataUpdate(type, res.data);
 
 
 
 
 
 
 
 
 
 
112
  } catch (err: unknown) {
113
  setErrors((prev) => ({
114
  ...prev,
@@ -135,7 +136,7 @@ export function FileUploadPanel({
135
  </p>
136
 
137
  {/* Upload zones */}
138
- <div className="grid grid-cols-1 md:grid-cols-3 gap-4">
139
  {zones.map((zone) => {
140
  const zoneData = parsedData[zone.type] as Record<string, unknown> | undefined;
141
  const isAnalyzing = analyzing[zone.type];
@@ -248,10 +249,14 @@ export function FileUploadPanel({
248
  <p className="text-xs text-[var(--color-success)] font-semibold mb-1">
249
  ✓ 解析完成
250
  {(() => {
251
- const meta = (zoneData as { _meta?: { parser?: string } })._meta;
252
- return meta?.parser ? (
 
 
 
 
253
  <span className="ml-2 text-[var(--color-text-muted)] font-normal">
254
- (by {meta.parser})
255
  </span>
256
  ) : null;
257
  })()}
@@ -303,21 +308,14 @@ function ZoneSummary({ type, data }: { type: string; data: Record<string, unknow
303
  </p>
304
  );
305
  }
306
- if (type === "student_answers") {
307
- const students = (data as { students?: unknown[] }).students;
308
- if (!students) return null;
309
- return (
310
- <p className="text-xs text-[var(--color-text-muted)]">
311
- 共 {students.length} 位學生
312
- </p>
313
- );
314
- }
315
- if (type === "teacher_answers") {
316
- const answers = (data as { answers?: unknown[] }).answers;
317
- if (!answers) return null;
318
  return (
319
  <p className="text-xs text-[var(--color-text-muted)]">
320
- 共 {answers.length} 答案
321
  </p>
322
  );
323
  }
 
16
  }
17
 
18
  interface UploadZone {
19
+ type: "questions" | "answers";
20
  label: string;
21
  description: string;
22
  placeholder: string;
 
32
  icon: "📝",
33
  },
34
  {
35
+ type: "answers",
36
+ label: "答案",
37
+ description: "上傳作答一覽表PDF / CSV / 圖片)",
38
+ placeholder:
39
+ "例如:A 卷的考生作答一覽表(CSV 第 1 列為題號標題,第 2 列為標準答案,第 3 列起為學生)",
 
 
 
 
 
 
40
  icon: "✅",
41
  },
42
  ];
 
49
  }: FileUploadPanelProps) {
50
  const [files, setFiles] = useState<Record<string, File[]>>({
51
  questions: [],
52
+ answers: [],
 
53
  });
54
  const [descriptions, setDescriptions] = useState<Record<string, string>>({
55
  questions: "",
56
+ answers: "",
 
57
  });
58
  const [analyzing, setAnalyzing] = useState<Record<string, boolean>>({});
59
  const [errors, setErrors] = useState<Record<string, string>>({});
 
61
  const [parsers, setParsers] = useState<ParserOption[]>([]);
62
  const [parserByZone, setParserByZone] = useState<Record<string, string>>({
63
  questions: "auto",
64
+ answers: "auto",
 
65
  });
66
  const fileInputRefs = useRef<Record<string, HTMLInputElement | null>>({});
67
 
 
99
  formData.append("files", f);
100
  }
101
  const res = await apiUpload<{ data: unknown }>(`/api/sessions/${sessionId}/upload`, formData);
102
+ if (type === "answers") {
103
+ const data = res.data as {
104
+ student_answers?: unknown;
105
+ teacher_answers?: unknown;
106
+ };
107
+ if (data.student_answers) onParsedDataUpdate("student_answers", data.student_answers);
108
+ if (data.teacher_answers) onParsedDataUpdate("teacher_answers", data.teacher_answers);
109
+ onParsedDataUpdate("answers", res.data);
110
+ } else {
111
+ onParsedDataUpdate(type, res.data);
112
+ }
113
  } catch (err: unknown) {
114
  setErrors((prev) => ({
115
  ...prev,
 
136
  </p>
137
 
138
  {/* Upload zones */}
139
+ <div className="grid grid-cols-1 md:grid-cols-2 gap-4">
140
  {zones.map((zone) => {
141
  const zoneData = parsedData[zone.type] as Record<string, unknown> | undefined;
142
  const isAnalyzing = analyzing[zone.type];
 
249
  <p className="text-xs text-[var(--color-success)] font-semibold mb-1">
250
  ✓ 解析完成
251
  {(() => {
252
+ const directMeta = (zoneData as { _meta?: { parser?: string } })._meta;
253
+ const nestedMeta = (zoneData as {
254
+ student_answers?: { _meta?: { parser?: string } };
255
+ }).student_answers?._meta;
256
+ const parserName = directMeta?.parser ?? nestedMeta?.parser;
257
+ return parserName ? (
258
  <span className="ml-2 text-[var(--color-text-muted)] font-normal">
259
+ (by {parserName})
260
  </span>
261
  ) : null;
262
  })()}
 
308
  </p>
309
  );
310
  }
311
+ if (type === "answers") {
312
+ const sa = (data as { student_answers?: { students?: unknown[] } }).student_answers;
313
+ const ta = (data as { teacher_answers?: { answers?: unknown[] } }).teacher_answers;
314
+ const studentCount = sa?.students?.length ?? 0;
315
+ const keyCount = ta?.answers?.length ?? 0;
 
 
 
 
 
 
 
316
  return (
317
  <p className="text-xs text-[var(--color-text-muted)]">
318
+ 共 {studentCount} 位學生 / 標準答案 {keyCount} 題
319
  </p>
320
  );
321
  }
chatkit/run-local.sh CHANGED
@@ -9,11 +9,16 @@ cd "$SCRIPT_DIR"
9
  echo "🚀 Starting ClassLens locally..."
10
  echo ""
11
 
12
- # Check for .env file
13
- if [ ! -f "../.env" ]; then
 
 
 
 
14
  echo "⚠️ No .env file found. Please create one from env.example"
15
  exit 1
16
  fi
 
17
 
18
  # Function to cleanup on exit
19
  cleanup() {
@@ -37,7 +42,9 @@ source .venv/bin/activate
37
  pip install -q -e . > /dev/null 2>&1
38
 
39
  # Load environment variables
40
- export $(grep -v '^#' ../.env | xargs)
 
 
41
 
42
  # Start backend in background
43
  uvicorn app.main:app --host 127.0.0.1 --port 8000 --reload > /tmp/classlens-backend.log 2>&1 &
@@ -47,7 +54,7 @@ BACKEND_PID=$!
47
  sleep 2
48
 
49
  # Start frontend
50
- echo "🎨 Starting frontend on http://localhost:3000"
51
  cd ../frontend
52
 
53
  # Install frontend dependencies if needed
@@ -64,7 +71,7 @@ echo ""
64
  echo "✅ ClassLens is running!"
65
  echo ""
66
  echo " Backend: http://127.0.0.1:8000"
67
- echo " Frontend: http://localhost:3000"
68
  echo ""
69
  echo " Backend logs: tail -f /tmp/classlens-backend.log"
70
  echo " Frontend logs: tail -f /tmp/classlens-frontend.log"
 
9
  echo "🚀 Starting ClassLens locally..."
10
  echo ""
11
 
12
+ # Check for .env file (project root or chatkit/) — resolve to absolute path
13
+ if [ -f "$SCRIPT_DIR/../.env" ]; then
14
+ ENV_FILE="$SCRIPT_DIR/../.env"
15
+ elif [ -f "$SCRIPT_DIR/.env" ]; then
16
+ ENV_FILE="$SCRIPT_DIR/.env"
17
+ else
18
  echo "⚠️ No .env file found. Please create one from env.example"
19
  exit 1
20
  fi
21
+ echo "🔑 Using env file: $ENV_FILE"
22
 
23
  # Function to cleanup on exit
24
  cleanup() {
 
42
  pip install -q -e . > /dev/null 2>&1
43
 
44
  # Load environment variables
45
+ set -a
46
+ source "$ENV_FILE"
47
+ set +a
48
 
49
  # Start backend in background
50
  uvicorn app.main:app --host 127.0.0.1 --port 8000 --reload > /tmp/classlens-backend.log 2>&1 &
 
54
  sleep 2
55
 
56
  # Start frontend
57
+ echo "🎨 Starting frontend on http://localhost:3003"
58
  cd ../frontend
59
 
60
  # Install frontend dependencies if needed
 
71
  echo "✅ ClassLens is running!"
72
  echo ""
73
  echo " Backend: http://127.0.0.1:8000"
74
+ echo " Frontend: http://localhost:3003"
75
  echo ""
76
  echo " Backend logs: tail -f /tmp/classlens-backend.log"
77
  echo " Frontend logs: tail -f /tmp/classlens-frontend.log"