Spaces:

taboola-cz
/

ClassLens

Sleeping

App Files Files Community

Yu Chen commited on 19 days ago

Commit

0641d27

1 Parent(s): ea3a98c

custom csv upload logic

Browse files

Files changed (7) hide show

chatkit/backend/app/file_processor.py +43 -4
chatkit/backend/app/main.py +5 -2
chatkit/backend/app/parsers/__init__.py +5 -1
chatkit/backend/app/parsers/csv_strict.py +26 -21
chatkit/backend/tests/test_csv_strict.py +81 -21
chatkit/frontend/src/components/step1/FileUploadPanel.tsx +34 -36
chatkit/run-local.sh +12 -5

chatkit/backend/app/file_processor.py CHANGED Viewed

@@ -26,7 +26,9 @@ async def process_uploaded_files(
 ) -> dict:
     """Parse uploaded files and persist the structured result.
-    Returns the parsed data dict with a `_meta` entry describing which parser ran.
     """
     parser_files: list[ParserFile] = []
     for file in files:
@@ -40,14 +42,49 @@ async def process_uploaded_files(
     if not parser_files:
         raise ValueError("No files uploaded")
     result = await _run_with_fallback(parser_files, data_type, description, model, parser)
-    await delete_parsed_data(session_id, data_type)
-    source_files = [f.filename for f in parser_files]
     raw_text = json.dumps(
         {
-            "source_files": source_files,
             "parser": result.parser_name,
             "notes": list(result.notes),
         },
@@ -61,6 +98,8 @@ async def process_uploaded_files(
         result.data,
     )
     return {
         **result.data,
         "_meta": {

 ) -> dict:
     """Parse uploaded files and persist the structured result.
+    `data_type='answers'` triggers a dual extraction: the same files are parsed
+    twice (once for student_answers, once for teacher_answers) and both rows are
+    stored. The response merges both shapes under their respective keys.
     """
     parser_files: list[ParserFile] = []
     for file in files:
     if not parser_files:
         raise ValueError("No files uploaded")
+    if data_type == "answers":
+        return await _process_combined_answers(
+            parser_files, session_id, description, model, parser
+        )
     result = await _run_with_fallback(parser_files, data_type, description, model, parser)
+    await _persist(session_id, data_type, parser_files, result)
+    return _response_payload(result)
+async def _process_combined_answers(
+    parser_files: list[ParserFile],
+    session_id: int,
+    description: str,
+    model: str,
+    parser: str,
+) -> dict:
+    student_result = await _run_with_fallback(
+        parser_files, "student_answers", description, model, parser
+    )
+    teacher_result = await _run_with_fallback(
+        parser_files, "teacher_answers", description, model, parser
+    )
+    await _persist(session_id, "student_answers", parser_files, student_result)
+    await _persist(session_id, "teacher_answers", parser_files, teacher_result)
+    return {
+        "student_answers": _response_payload(student_result),
+        "teacher_answers": _response_payload(teacher_result),
+    }
+async def _persist(
+    session_id: int,
+    data_type: str,
+    parser_files: list[ParserFile],
+    result: ParserResult,
+) -> None:
+    await delete_parsed_data(session_id, data_type)
     raw_text = json.dumps(
         {
+            "source_files": [f.filename for f in parser_files],
             "parser": result.parser_name,
             "notes": list(result.notes),
         },
         result.data,
     )
+def _response_payload(result: ParserResult) -> dict:
     return {
         **result.data,
         "_meta": {

chatkit/backend/app/main.py CHANGED Viewed

@@ -147,8 +147,11 @@ async def api_upload_files(
     if not session or session["user_id"] != user["id"]:
         raise HTTPException(status_code=404, detail="Session not found")
-    if data_type not in ("questions", "student_answers", "teacher_answers"):
-        raise HTTPException(status_code=400, detail="data_type must be 'questions', 'student_answers', or 'teacher_answers'")
     try:
         structured = await process_uploaded_files(

     if not session or session["user_id"] != user["id"]:
         raise HTTPException(status_code=404, detail="Session not found")
+    if data_type not in ("questions", "student_answers", "teacher_answers", "answers"):
+        raise HTTPException(
+            status_code=400,
+            detail="data_type must be 'questions', 'answers', 'student_answers', or 'teacher_answers'",
+        )
     try:
         structured = await process_uploaded_files(

chatkit/backend/app/parsers/__init__.py CHANGED Viewed

@@ -57,12 +57,16 @@ def pick_auto(file_bytes: bytes, filename: str, data_type: str) -> AnswerSheetPa
     if data_type == "questions":
         return _REGISTRY[LLMVisionParser.name]
     if get_extension(filename) == ".csv":
         return _REGISTRY[StrictCsvParser.name]
     for parser_name in (PyMuPDFTablesParser.name, PdfPlumberTablesParser.name):
         parser = _REGISTRY[parser_name]
-        if parser.can_handle(file_bytes, filename, data_type):
             return parser
     return _REGISTRY[LLMVisionParser.name]

     if data_type == "questions":
         return _REGISTRY[LLMVisionParser.name]
+    # The "answers" combined zone runs the parser twice (once per subtype);
+    # treat it the same as student_answers for routing purposes.
+    routing_data_type = "student_answers" if data_type == "answers" else data_type
     if get_extension(filename) == ".csv":
         return _REGISTRY[StrictCsvParser.name]
     for parser_name in (PyMuPDFTablesParser.name, PdfPlumberTablesParser.name):
         parser = _REGISTRY[parser_name]
+        if parser.can_handle(file_bytes, filename, routing_data_type):
             return parser
     return _REGISTRY[LLMVisionParser.name]

chatkit/backend/app/parsers/csv_strict.py CHANGED Viewed

@@ -4,26 +4,25 @@
 A single CSV file represents the full answer table:
-- Row 0: the answer key. First cell is the name marker (e.g. `正確解答` /
   `標準答案` / `KEY`). Remaining cells are the correct letters per question.
-- Row 1..N: one row per student. First cell is the student's name. Remaining
   cells are the student's answer per question (single letter A-Z, `=` for
   correct, or blank for "did not answer").
-There is no separate header row. Column 0 is always the name column;
-remaining columns are positional Q1..QN. Other column-header text is not
-required and is ignored if present.
 ## Example
 ```
 正確解答,B,A,A,C,D
 梁祐邦,A,=,C,C,D
 田瑜婕,=,A,C,B,D
 ```
-When this file is uploaded to the `student_answers` zone, only rows 1..N are
-extracted. When uploaded to the `teacher_answers` zone, only row 0 is used.
 """
 from __future__ import annotations
@@ -44,7 +43,8 @@ class StrictCsvParser:
     display_name = "CSV (strict format)"
     description = (
         "Direct upload of an already-tabulated answer sheet. "
-        "Row 0 = answer key (first cell '正確解答'), row 1+ = students. "
         "Column 0 = name; remaining columns are positional Q1..QN. "
         "Cells: A-Z, '=' for correct, or blank."
     )
@@ -137,10 +137,10 @@ def _parse_students(files: list[ParserFile], parser_name: str) -> ParserResult:
         rows = _read_rows(text)
         if not rows:
             raise ValueError(f"{f.filename}: CSV is empty.")
-        if len(rows) < 2:
             raise ValueError(
-                f"{f.filename}: expected the answer key on row 1 plus at least one "
-                f"student row, got {len(rows)} row(s)."
             )
         n_cols = _column_count(rows)
@@ -150,17 +150,16 @@ def _parse_students(files: list[ParserFile], parser_name: str) -> ParserResult:
             )
         n_questions = n_cols - 1
-        # Skip the answer-key row (typically row 0). If the first row's name
-        # cell does not look like a key marker, warn but still skip it so the
-        # user's positional contract is respected.
-        first_name = rows[0][0] if rows[0] else ""
-        if not _is_key_row(first_name):
             notes.append(
-                f"{f.filename}: row 1 first column is '{first_name}', expected "
                 f"'正確解答' / '標準答案' — treating it as the key anyway."
             )
-        for idx, row in enumerate(rows[1:], start=2):
             padded = _pad_row(row, n_cols)
             name = padded[0]
             if not name:
@@ -201,6 +200,11 @@ def _parse_teacher(files: list[ParserFile], parser_name: str) -> ParserResult:
     rows = _read_rows(text)
     if not rows:
         raise ValueError(f"{f.filename}: CSV is empty.")
     n_cols = _column_count(rows)
     if n_cols < 2:
@@ -209,13 +213,14 @@ def _parse_teacher(files: list[ParserFile], parser_name: str) -> ParserResult:
         )
     n_questions = n_cols - 1
-    key_row = _pad_row(rows[0], n_cols)
     notes: list[str] = list(notes_prefix)
     first_name = key_row[0]
     if not _is_key_row(first_name):
         notes.append(
-            f"{f.filename}: row 1 first column is '{first_name}', expected "
             f"'正確解答' / '標準答案' — treating it as the key anyway."
         )

 A single CSV file represents the full answer table:
+- Row 0: header row (column titles like `Q1, Q2, ...` — content is ignored,
+  only used to determine column count).
+- Row 1: the answer key. First cell is the name marker (e.g. `正確解答` /
   `標準答案` / `KEY`). Remaining cells are the correct letters per question.
+- Row 2..N: one row per student. First cell is the student's name. Remaining
   cells are the student's answer per question (single letter A-Z, `=` for
   correct, or blank for "did not answer").
+Column 0 is always the name column; remaining columns are positional
+Q1..QN. Header text in row 0 is not required and is ignored if present.
 ## Example
 ```
+,Q1,Q2,Q3,Q4,Q5
 正確解答,B,A,A,C,D
 梁祐邦,A,=,C,C,D
 田瑜婕,=,A,C,B,D
 ```
 """
 from __future__ import annotations
     display_name = "CSV (strict format)"
     description = (
         "Direct upload of an already-tabulated answer sheet. "
+        "Row 0 = header (Q1, Q2, ... — ignored), row 1 = answer key "
+        "(first cell '正確解答'), row 2+ = students. "
         "Column 0 = name; remaining columns are positional Q1..QN. "
         "Cells: A-Z, '=' for correct, or blank."
     )
         rows = _read_rows(text)
         if not rows:
             raise ValueError(f"{f.filename}: CSV is empty.")
+        if len(rows) < 3:
             raise ValueError(
+                f"{f.filename}: expected a header row, an answer-key row, and at "
+                f"least one student row (got {len(rows)} row(s))."
             )
         n_cols = _column_count(rows)
             )
         n_questions = n_cols - 1
+        # rows[0] is the column header (Q1..QN). It's ignored beyond column count.
+        # rows[1] should be the answer key — warn if the name cell isn't a marker.
+        key_name = rows[1][0] if rows[1] else ""
+        if not _is_key_row(key_name):
             notes.append(
+                f"{f.filename}: row 2 first column is '{key_name}', expected "
                 f"'正確解答' / '標準答案' — treating it as the key anyway."
             )
+        for row in rows[2:]:
             padded = _pad_row(row, n_cols)
             name = padded[0]
             if not name:
     rows = _read_rows(text)
     if not rows:
         raise ValueError(f"{f.filename}: CSV is empty.")
+    if len(rows) < 2:
+        raise ValueError(
+            f"{f.filename}: expected a header row plus an answer-key row "
+            f"(got {len(rows)} row(s))."
+        )
     n_cols = _column_count(rows)
     if n_cols < 2:
         )
     n_questions = n_cols - 1
+    # Row 0 is the column header; row 1 is the answer key.
+    key_row = _pad_row(rows[1], n_cols)
     notes: list[str] = list(notes_prefix)
     first_name = key_row[0]
     if not _is_key_row(first_name):
         notes.append(
+            f"{f.filename}: row 2 first column is '{first_name}', expected "
             f"'正確解答' / '標準答案' — treating it as the key anyway."
         )

chatkit/backend/tests/test_csv_strict.py CHANGED Viewed

@@ -23,7 +23,7 @@ def test_can_handle_csv_only_for_answer_zones():
 @pytest.mark.unit
 async def test_student_answers_basic():
-    csv_text = "正確解答,B,A,A,C,D\n梁祐邦,A,=,C,C,D\n田瑜婕,=,A,C,B,D\n"
     p = get_parser("csv_strict")
     result = await p.parse([_file(csv_text)], "student_answers")
     students = result.data["students"]
@@ -35,7 +35,7 @@ async def test_student_answers_basic():
 @pytest.mark.unit
 async def test_student_answers_blank_cell_becomes_none():
-    csv_text = "正確解答,B,A,A\n梁祐邦,A,,C\n"
     p = get_parser("csv_strict")
     result = await p.parse([_file(csv_text)], "student_answers")
     answers = [a["answer"] for a in result.data["students"][0]["answers"]]
@@ -44,7 +44,7 @@ async def test_student_answers_blank_cell_becomes_none():
 @pytest.mark.unit
 async def test_student_answers_legacy_dash_becomes_equals():
-    csv_text = "正確解答,B,A,A\n梁祐邦,-,A,A\n"
     p = get_parser("csv_strict")
     result = await p.parse([_file(csv_text)], "student_answers")
     assert result.data["students"][0]["answers"][0]["answer"] == "="
@@ -52,7 +52,7 @@ async def test_student_answers_legacy_dash_becomes_equals():
 @pytest.mark.unit
 async def test_student_answers_skips_empty_name_rows():
-    csv_text = "正確解答,B,A\n梁祐邦,A,A\n,B,B\n田瑜婕,B,A\n"
     p = get_parser("csv_strict")
     result = await p.parse([_file(csv_text)], "student_answers")
     assert [s["name"] for s in result.data["students"]] == ["梁祐邦", "田瑜婕"]
@@ -60,11 +60,11 @@ async def test_student_answers_skips_empty_name_rows():
 @pytest.mark.unit
-async def test_student_answers_warns_when_first_name_isnt_marker():
-    csv_text = "梁祐邦,A,B\n田瑜婕,B,A\n"
     p = get_parser("csv_strict")
     result = await p.parse([_file(csv_text)], "student_answers")
-    # First row is treated as the key — student count is 1
     assert len(result.data["students"]) == 1
     assert result.data["students"][0]["name"] == "田瑜婕"
     assert any("treating it as the key" in n for n in result.notes)
@@ -72,7 +72,7 @@ async def test_student_answers_warns_when_first_name_isnt_marker():
 @pytest.mark.unit
 async def test_student_answers_rejects_too_few_rows():
-    csv_text = "正確解答,A,B,C\n"
     p = get_parser("csv_strict")
     with pytest.raises(ValueError, match="at least one student"):
         await p.parse([_file(csv_text)], "student_answers")
@@ -80,7 +80,7 @@ async def test_student_answers_rejects_too_few_rows():
 @pytest.mark.unit
 async def test_student_answers_rejects_single_column():
-    csv_text = "正確解答\n梁祐邦\n"
     p = get_parser("csv_strict")
     with pytest.raises(ValueError, match="at least 2 columns"):
         await p.parse([_file(csv_text)], "student_answers")
@@ -88,7 +88,7 @@ async def test_student_answers_rejects_single_column():
 @pytest.mark.unit
 async def test_student_answers_pads_short_rows():
-    csv_text = "正確解答,B,A,A,C\n梁祐邦,A,=\n"
     p = get_parser("csv_strict")
     result = await p.parse([_file(csv_text)], "student_answers")
     answers = [a["answer"] for a in result.data["students"][0]["answers"]]
@@ -96,10 +96,9 @@ async def test_student_answers_pads_short_rows():
 @pytest.mark.unit
-async def test_student_answers_arbitrary_other_column_text_ignored():
-    """Header text in row 0 cells beyond col 0 doesn't matter — col headers
-    are not validated, the file is positional."""
-    csv_text = "正確解答,B,A,A\n梁祐邦,A,=,C\n"
     p = get_parser("csv_strict")
     result = await p.parse([_file(csv_text)], "student_answers")
     assert len(result.data["students"]) == 1
@@ -107,7 +106,7 @@ async def test_student_answers_arbitrary_other_column_text_ignored():
 @pytest.mark.unit
 async def test_teacher_answers_basic():
-    csv_text = "正確解答,B,A,A,C,D\n梁祐邦,A,=,C,C,D\n"
     p = get_parser("csv_strict")
     result = await p.parse([_file(csv_text)], "teacher_answers")
     answers = result.data["answers"]
@@ -117,7 +116,7 @@ async def test_teacher_answers_basic():
 @pytest.mark.unit
 async def test_teacher_answers_rejects_equals_in_key():
-    csv_text = "正確解答,B,=,A\n梁祐邦,A,A,A\n"
     p = get_parser("csv_strict")
     with pytest.raises(ValueError, match="must be a concrete letter"):
         await p.parse([_file(csv_text)], "teacher_answers")
@@ -125,7 +124,7 @@ async def test_teacher_answers_rejects_equals_in_key():
 @pytest.mark.unit
 async def test_teacher_answers_rejects_blank_cell_in_key():
-    csv_text = "正確解答,B,,A\n梁祐邦,A,A,A\n"
     p = get_parser("csv_strict")
     with pytest.raises(ValueError, match="empty or not a letter"):
         await p.parse([_file(csv_text)], "teacher_answers")
@@ -133,7 +132,7 @@ async def test_teacher_answers_rejects_blank_cell_in_key():
 @pytest.mark.unit
 async def test_utf8_bom_accepted():
-    csv_text = "正確解答,B,A\n梁祐邦,A,A\n"
     file = ParserFile(filename="answers.csv", content=("" + csv_text).encode("utf-8"))
     p = get_parser("csv_strict")
     result = await p.parse([file], "student_answers")
@@ -142,7 +141,7 @@ async def test_utf8_bom_accepted():
 @pytest.mark.unit
 async def test_non_utf8_raises_clear_error():
-    csv_text = "正確解答,B,A\n梁祐邦,A,A\n"
     file = ParserFile(filename="answers.csv", content=csv_text.encode("big5"))
     p = get_parser("csv_strict")
     with pytest.raises(ValueError, match="UTF-8"):
@@ -165,7 +164,7 @@ def test_pick_auto_questions_never_uses_csv():
 @pytest.mark.unit
 async def test_lowercase_marker_accepted():
-    csv_text = "key,B,A\n梁祐邦,A,A\n"
     p = get_parser("csv_strict")
     result = await p.parse([_file(csv_text)], "student_answers")
     # Should NOT warn — "key" is in the marker set
@@ -175,8 +174,69 @@ async def test_lowercase_marker_accepted():
 @pytest.mark.unit
 async def test_excel_trailing_empty_columns_ignored():
-    csv_text = "正確解答,B,A,A,,\n梁祐邦,A,=,C,,\n"
     p = get_parser("csv_strict")
     result = await p.parse([_file(csv_text)], "student_answers")
     answers = [a["answer"] for a in result.data["students"][0]["answers"]]
     assert answers == ["A", "=", "C"]

 @pytest.mark.unit
 async def test_student_answers_basic():
+    csv_text = ",Q1,Q2,Q3,Q4,Q5\n正確解答,B,A,A,C,D\n梁祐邦,A,=,C,C,D\n田瑜婕,=,A,C,B,D\n"
     p = get_parser("csv_strict")
     result = await p.parse([_file(csv_text)], "student_answers")
     students = result.data["students"]
 @pytest.mark.unit
 async def test_student_answers_blank_cell_becomes_none():
+    csv_text = ",Q1,Q2,Q3\n正確解答,B,A,A\n梁祐邦,A,,C\n"
     p = get_parser("csv_strict")
     result = await p.parse([_file(csv_text)], "student_answers")
     answers = [a["answer"] for a in result.data["students"][0]["answers"]]
 @pytest.mark.unit
 async def test_student_answers_legacy_dash_becomes_equals():
+    csv_text = ",Q1,Q2,Q3\n正確解答,B,A,A\n梁祐邦,-,A,A\n"
     p = get_parser("csv_strict")
     result = await p.parse([_file(csv_text)], "student_answers")
     assert result.data["students"][0]["answers"][0]["answer"] == "="
 @pytest.mark.unit
 async def test_student_answers_skips_empty_name_rows():
+    csv_text = ",Q1,Q2\n正確解答,B,A\n梁祐邦,A,A\n,B,B\n田瑜婕,B,A\n"
     p = get_parser("csv_strict")
     result = await p.parse([_file(csv_text)], "student_answers")
     assert [s["name"] for s in result.data["students"]] == ["梁祐邦", "田瑜婕"]
 @pytest.mark.unit
+async def test_student_answers_warns_when_key_row_isnt_marker():
+    csv_text = ",Q1,Q2\n梁祐邦,A,B\n田瑜婕,B,A\n"
     p = get_parser("csv_strict")
     result = await p.parse([_file(csv_text)], "student_answers")
+    # Row 1 (after header) treated as the key even when name isn't a marker
     assert len(result.data["students"]) == 1
     assert result.data["students"][0]["name"] == "田瑜婕"
     assert any("treating it as the key" in n for n in result.notes)
 @pytest.mark.unit
 async def test_student_answers_rejects_too_few_rows():
+    csv_text = ",Q1,Q2,Q3\n正確解答,A,B,C\n"
     p = get_parser("csv_strict")
     with pytest.raises(ValueError, match="at least one student"):
         await p.parse([_file(csv_text)], "student_answers")
 @pytest.mark.unit
 async def test_student_answers_rejects_single_column():
+    csv_text = "header\n正確解答\n梁祐邦\n"
     p = get_parser("csv_strict")
     with pytest.raises(ValueError, match="at least 2 columns"):
         await p.parse([_file(csv_text)], "student_answers")
 @pytest.mark.unit
 async def test_student_answers_pads_short_rows():
+    csv_text = ",Q1,Q2,Q3,Q4\n正確解答,B,A,A,C\n梁祐邦,A,=\n"
     p = get_parser("csv_strict")
     result = await p.parse([_file(csv_text)], "student_answers")
     answers = [a["answer"] for a in result.data["students"][0]["answers"]]
 @pytest.mark.unit
+async def test_student_answers_arbitrary_header_text_ignored():
+    """Row 0 header text is not validated — only column count matters."""
+    csv_text = "name,foo,bar,baz\n正確解答,B,A,A\n梁祐邦,A,=,C\n"
     p = get_parser("csv_strict")
     result = await p.parse([_file(csv_text)], "student_answers")
     assert len(result.data["students"]) == 1
 @pytest.mark.unit
 async def test_teacher_answers_basic():
+    csv_text = ",Q1,Q2,Q3,Q4,Q5\n正確解答,B,A,A,C,D\n梁祐邦,A,=,C,C,D\n"
     p = get_parser("csv_strict")
     result = await p.parse([_file(csv_text)], "teacher_answers")
     answers = result.data["answers"]
 @pytest.mark.unit
 async def test_teacher_answers_rejects_equals_in_key():
+    csv_text = ",Q1,Q2,Q3\n正確解答,B,=,A\n梁祐邦,A,A,A\n"
     p = get_parser("csv_strict")
     with pytest.raises(ValueError, match="must be a concrete letter"):
         await p.parse([_file(csv_text)], "teacher_answers")
 @pytest.mark.unit
 async def test_teacher_answers_rejects_blank_cell_in_key():
+    csv_text = ",Q1,Q2,Q3\n正確解答,B,,A\n梁祐邦,A,A,A\n"
     p = get_parser("csv_strict")
     with pytest.raises(ValueError, match="empty or not a letter"):
         await p.parse([_file(csv_text)], "teacher_answers")
 @pytest.mark.unit
 async def test_utf8_bom_accepted():
+    csv_text = ",Q1,Q2\n正確解答,B,A\n梁祐邦,A,A\n"
     file = ParserFile(filename="answers.csv", content=("" + csv_text).encode("utf-8"))
     p = get_parser("csv_strict")
     result = await p.parse([file], "student_answers")
 @pytest.mark.unit
 async def test_non_utf8_raises_clear_error():
+    csv_text = ",Q1,Q2\n正確解答,B,A\n梁祐邦,A,A\n"
     file = ParserFile(filename="answers.csv", content=csv_text.encode("big5"))
     p = get_parser("csv_strict")
     with pytest.raises(ValueError, match="UTF-8"):
 @pytest.mark.unit
 async def test_lowercase_marker_accepted():
+    csv_text = ",Q1,Q2\nkey,B,A\n梁祐邦,A,A\n"
     p = get_parser("csv_strict")
     result = await p.parse([_file(csv_text)], "student_answers")
     # Should NOT warn — "key" is in the marker set
 @pytest.mark.unit
 async def test_excel_trailing_empty_columns_ignored():
+    csv_text = ",Q1,Q2,Q3,,\n正確解答,B,A,A,,\n梁祐邦,A,=,C,,\n"
     p = get_parser("csv_strict")
     result = await p.parse([_file(csv_text)], "student_answers")
     answers = [a["answer"] for a in result.data["students"][0]["answers"]]
     assert answers == ["A", "=", "C"]
+# ---------------------------------------------------------------------------
+# Combined "answers" data_type via process_uploaded_files
+# ---------------------------------------------------------------------------
+class _FakeUpload:
+    """Minimal stand-in for FastAPI's UploadFile."""
+    def __init__(self, filename: str, content: bytes) -> None:
+        self.filename = filename
+        self._content = content
+    async def read(self) -> bytes:
+        return self._content
+@pytest.mark.unit
+async def test_combined_answers_persists_both_subtypes(tmp_path, monkeypatch):
+    """`data_type='answers'` runs the parser twice and stores both rows."""
+    from app import database
+    from app.file_processor import process_uploaded_files
+    # Point the DB at a fresh file
+    db_file = tmp_path / "test.db"
+    monkeypatch.setattr(database, "DATABASE_PATH", db_file)
+    monkeypatch.setattr(
+        database.get_settings.__wrapped__, "__defaults__", None, raising=False
+    )
+    # Ensure config returns no override
+    from app import config as config_module
+    cached = config_module.get_settings()
+    monkeypatch.setattr(cached, "database_path", "", raising=False)
+    await database.init_database()
+    user_id = await database.create_user("t@example.com", "h", "T")
+    session_id = await database.create_session(user_id, "S")
+    csv_text = ",Q1,Q2,Q3,Q4,Q5\n正確解答,B,A,A,C,D\n梁祐邦,A,=,C,C,D\n田瑜婕,=,A,C,B,D\n"
+    upload = _FakeUpload("answers.csv", csv_text.encode("utf-8"))
+    response = await process_uploaded_files(
+        [upload],
+        "answers",
+        session_id,
+        parser="csv_strict",
+    )
+    assert "student_answers" in response
+    assert "teacher_answers" in response
+    assert len(response["student_answers"]["students"]) == 2
+    assert len(response["teacher_answers"]["answers"]) == 5
+    rows = await database.get_parsed_data(session_id)
+    by_type = {r["data_type"]: r for r in rows}
+    assert "student_answers" in by_type
+    assert "teacher_answers" in by_type
+    assert len(by_type["student_answers"]["structured_data"]["students"]) == 2
+    assert len(by_type["teacher_answers"]["structured_data"]["answers"]) == 5

chatkit/frontend/src/components/step1/FileUploadPanel.tsx CHANGED Viewed

@@ -16,7 +16,7 @@ interface FileUploadPanelProps {
 }
 interface UploadZone {
-  type: "questions" | "student_answers" | "teacher_answers";
   label: string;
   description: string;
   placeholder: string;
@@ -32,17 +32,11 @@ const zones: UploadZone[] = [
     icon: "📝",
   },
   {
-    type: "student_answers",
-    label: "學生答案",
-    description: "上傳學生作答資料（任意格式）",
-    placeholder: "例如：35 位學生的答案卷，手寫掃描",
-    icon: "👨‍🎓",
-  },
-  {
-    type: "teacher_answers",
-    label: "標準答案",
-    description: "上傳教師答案/解答（任意格式）",
-    placeholder: "例如：教師提供的標準答案與解題步驟",
     icon: "✅",
   },
 ];
@@ -55,13 +49,11 @@ export function FileUploadPanel({
 }: FileUploadPanelProps) {
   const [files, setFiles] = useState<Record<string, File[]>>({
     questions: [],
-    student_answers: [],
-    teacher_answers: [],
   });
   const [descriptions, setDescriptions] = useState<Record<string, string>>({
     questions: "",
-    student_answers: "",
-    teacher_answers: "",
   });
   const [analyzing, setAnalyzing] = useState<Record<string, boolean>>({});
   const [errors, setErrors] = useState<Record<string, string>>({});
@@ -69,8 +61,7 @@ export function FileUploadPanel({
   const [parsers, setParsers] = useState<ParserOption[]>([]);
   const [parserByZone, setParserByZone] = useState<Record<string, string>>({
     questions: "auto",
-    student_answers: "auto",
-    teacher_answers: "auto",
   });
   const fileInputRefs = useRef<Record<string, HTMLInputElement | null>>({});
@@ -108,7 +99,17 @@ export function FileUploadPanel({
         formData.append("files", f);
       }
       const res = await apiUpload<{ data: unknown }>(`/api/sessions/${sessionId}/upload`, formData);
-      onParsedDataUpdate(type, res.data);
     } catch (err: unknown) {
       setErrors((prev) => ({
         ...prev,
@@ -135,7 +136,7 @@ export function FileUploadPanel({
       </p>
       {/* Upload zones */}
-      <div className="grid grid-cols-1 md:grid-cols-3 gap-4">
         {zones.map((zone) => {
           const zoneData = parsedData[zone.type] as Record<string, unknown> | undefined;
           const isAnalyzing = analyzing[zone.type];
@@ -248,10 +249,14 @@ export function FileUploadPanel({
                   <p className="text-xs text-[var(--color-success)] font-semibold mb-1">
                     ✓ 解析完成
                     {(() => {
-                      const meta = (zoneData as { _meta?: { parser?: string } })._meta;
-                      return meta?.parser ? (
                         <span className="ml-2 text-[var(--color-text-muted)] font-normal">
-                          (by {meta.parser})
                         </span>
                       ) : null;
                     })()}
@@ -303,21 +308,14 @@ function ZoneSummary({ type, data }: { type: string; data: Record<string, unknow
       </p>
     );
   }
-  if (type === "student_answers") {
-    const students = (data as { students?: unknown[] }).students;
-    if (!students) return null;
-    return (
-      <p className="text-xs text-[var(--color-text-muted)]">
-        共 {students.length} 位學生
-      </p>
-    );
-  }
-  if (type === "teacher_answers") {
-    const answers = (data as { answers?: unknown[] }).answers;
-    if (!answers) return null;
     return (
       <p className="text-xs text-[var(--color-text-muted)]">
-        共 {answers.length} 題答案
       </p>
     );
   }

 }
 interface UploadZone {
+  type: "questions" | "answers";
   label: string;
   description: string;
   placeholder: string;
     icon: "📝",
   },
   {
+    type: "answers",
+    label: "答案",
+    description: "上傳作答一覽表（PDF / CSV / 圖片）",
+    placeholder:
+      "例如：A 卷的考生作答一覽表（CSV 第 1 列為題號標題，第 2 列為標準答案，第 3 列起為學生）",
     icon: "✅",
   },
 ];
 }: FileUploadPanelProps) {
   const [files, setFiles] = useState<Record<string, File[]>>({
     questions: [],
+    answers: [],
   });
   const [descriptions, setDescriptions] = useState<Record<string, string>>({
     questions: "",
+    answers: "",
   });
   const [analyzing, setAnalyzing] = useState<Record<string, boolean>>({});
   const [errors, setErrors] = useState<Record<string, string>>({});
   const [parsers, setParsers] = useState<ParserOption[]>([]);
   const [parserByZone, setParserByZone] = useState<Record<string, string>>({
     questions: "auto",
+    answers: "auto",
   });
   const fileInputRefs = useRef<Record<string, HTMLInputElement | null>>({});
         formData.append("files", f);
       }
       const res = await apiUpload<{ data: unknown }>(`/api/sessions/${sessionId}/upload`, formData);
+      if (type === "answers") {
+        const data = res.data as {
+          student_answers?: unknown;
+          teacher_answers?: unknown;
+        };
+        if (data.student_answers) onParsedDataUpdate("student_answers", data.student_answers);
+        if (data.teacher_answers) onParsedDataUpdate("teacher_answers", data.teacher_answers);
+        onParsedDataUpdate("answers", res.data);
+      } else {
+        onParsedDataUpdate(type, res.data);
+      }
     } catch (err: unknown) {
       setErrors((prev) => ({
         ...prev,
       </p>
       {/* Upload zones */}
+      <div className="grid grid-cols-1 md:grid-cols-2 gap-4">
         {zones.map((zone) => {
           const zoneData = parsedData[zone.type] as Record<string, unknown> | undefined;
           const isAnalyzing = analyzing[zone.type];
                   <p className="text-xs text-[var(--color-success)] font-semibold mb-1">
                     ✓ 解析完成
                     {(() => {
+                      const directMeta = (zoneData as { _meta?: { parser?: string } })._meta;
+                      const nestedMeta = (zoneData as {
+                        student_answers?: { _meta?: { parser?: string } };
+                      }).student_answers?._meta;
+                      const parserName = directMeta?.parser ?? nestedMeta?.parser;
+                      return parserName ? (
                         <span className="ml-2 text-[var(--color-text-muted)] font-normal">
+                          (by {parserName})
                         </span>
                       ) : null;
                     })()}
       </p>
     );
   }
+  if (type === "answers") {
+    const sa = (data as { student_answers?: { students?: unknown[] } }).student_answers;
+    const ta = (data as { teacher_answers?: { answers?: unknown[] } }).teacher_answers;
+    const studentCount = sa?.students?.length ?? 0;
+    const keyCount = ta?.answers?.length ?? 0;
     return (
       <p className="text-xs text-[var(--color-text-muted)]">
+        共 {studentCount} 位學生 / 標準答案 {keyCount} 題
       </p>
     );
   }

chatkit/run-local.sh CHANGED Viewed

@@ -9,11 +9,16 @@ cd "$SCRIPT_DIR"
 echo "🚀 Starting ClassLens locally..."
 echo ""
-# Check for .env file
-if [ ! -f "../.env" ]; then
     echo "⚠️  No .env file found. Please create one from env.example"
     exit 1
 fi
 # Function to cleanup on exit
 cleanup() {
@@ -37,7 +42,9 @@ source .venv/bin/activate
 pip install -q -e . > /dev/null 2>&1
 # Load environment variables
-export $(grep -v '^#' ../.env | xargs)
 # Start backend in background
 uvicorn app.main:app --host 127.0.0.1 --port 8000 --reload > /tmp/classlens-backend.log 2>&1 &
@@ -47,7 +54,7 @@ BACKEND_PID=$!
 sleep 2
 # Start frontend
-echo "🎨 Starting frontend on http://localhost:3000"
 cd ../frontend
 # Install frontend dependencies if needed
@@ -64,7 +71,7 @@ echo ""
 echo "✅ ClassLens is running!"
 echo ""
 echo "   Backend:  http://127.0.0.1:8000"
-echo "   Frontend: http://localhost:3000"
 echo ""
 echo "   Backend logs:  tail -f /tmp/classlens-backend.log"
 echo "   Frontend logs: tail -f /tmp/classlens-frontend.log"

 echo "🚀 Starting ClassLens locally..."
 echo ""
+# Check for .env file (project root or chatkit/) — resolve to absolute path
+if [ -f "$SCRIPT_DIR/../.env" ]; then
+    ENV_FILE="$SCRIPT_DIR/../.env"
+elif [ -f "$SCRIPT_DIR/.env" ]; then
+    ENV_FILE="$SCRIPT_DIR/.env"
+else
     echo "⚠️  No .env file found. Please create one from env.example"
     exit 1
 fi
+echo "🔑 Using env file: $ENV_FILE"
 # Function to cleanup on exit
 cleanup() {
 pip install -q -e . > /dev/null 2>&1
 # Load environment variables
+set -a
+source "$ENV_FILE"
+set +a
 # Start backend in background
 uvicorn app.main:app --host 127.0.0.1 --port 8000 --reload > /tmp/classlens-backend.log 2>&1 &
 sleep 2
 # Start frontend
+echo "🎨 Starting frontend on http://localhost:3003"
 cd ../frontend
 # Install frontend dependencies if needed
 echo "✅ ClassLens is running!"
 echo ""
 echo "   Backend:  http://127.0.0.1:8000"
+echo "   Frontend: http://localhost:3003"
 echo ""
 echo "   Backend logs:  tail -f /tmp/classlens-backend.log"
 echo "   Frontend logs: tail -f /tmp/classlens-frontend.log"