Toya0421 commited on
Commit
e954168
·
verified ·
1 Parent(s): c878901

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -67
app.py CHANGED
@@ -232,72 +232,41 @@ def get_new_passage_random_any(used_passages_set):
232
 
233
 
234
  # ======================================================
235
- # Group1: 本文のみ抽出(書き換えなし
236
  # ======================================================
237
 
238
- def extract_main_body(text: str) -> str:
239
- if not text:
240
- return ""
241
-
242
- lines = text.splitlines()
243
-
244
- drop_patterns = [
245
- r"^\s*title\s*:\s*.*$",
246
- r"^\s*author\s*:\s*.*$",
247
- r"^\s*by\s+[A-Z].*$",
248
- r"^\s*source\s*:\s*.*$",
249
- r"^\s*copyright\s*.*$",
250
- r".*©.*",
251
- r".*Project\s+Gutenberg.*",
252
- r".*GUTENBERG.*",
253
- r"^\s*http[s]?://\S+.*$",
254
- r"^\s*www\.\S+.*$",
255
- r"^\s*\[.*\]\s*$",
256
- r"^\s*\(\s*.*\s*\)\s*$",
257
- r"^\s*end\s+of\s+.*$",
258
- r"^\s*\*{3}.*\*{3}\s*$",
259
- ]
260
- drop_re = re.compile("|".join(f"(?:{p})" for p in drop_patterns), re.IGNORECASE)
261
-
262
- kept = []
263
- for ln in lines:
264
- if drop_re.match(ln.strip()):
265
- continue
266
- kept.append(ln)
267
-
268
- def is_title_like(s: str) -> bool:
269
- t = s.strip()
270
- if not t:
271
- return True
272
- if len(t) <= 80 and "." not in t and "!" not in t and "?" not in t:
273
- if len(t.split()) <= 12:
274
- return True
275
- return False
276
-
277
- i = 0
278
- skipped = 0
279
- while i < len(kept) and skipped < 3 and is_title_like(kept[i]):
280
- i += 1
281
- skipped += 1
282
-
283
- body_lines = kept[i:]
284
-
285
- while body_lines and body_lines[-1].strip() == "":
286
- body_lines.pop()
287
-
288
- out = []
289
- blank = False
290
- for ln in body_lines:
291
- if ln.strip() == "":
292
- if not blank:
293
- out.append("")
294
- blank = True
295
- else:
296
- out.append(ln.rstrip())
297
- blank = False
298
 
299
- result = "\n".join(out).strip()
300
- return result if result else text.strip()
301
 
302
  # ======================================================
303
  # Rewrite(同時実行制限付き) Group2で使用
@@ -373,6 +342,7 @@ Formatting Constraints
373
  )
374
  return resp.choices[0].message.content.strip()
375
 
 
376
  def split_pages(text, max_words=300):
377
  sentences = re.split(r'(?<=[.!?])\s+', text.strip())
378
  pages = []
@@ -397,6 +367,7 @@ def split_pages(text, max_words=300):
397
 
398
  return pages or [text]
399
 
 
400
  # ======================================================
401
  # Start(session_stateでユーザー状態管理)
402
  # ★Start後:入力はリロードまで固定(1回だけ)
@@ -487,7 +458,7 @@ def start_test(student_id, level_input, group_input, session_state):
487
  )
488
 
489
  if group == 1:
490
- processed = extract_main_body(text)
491
  measured_fre = orig_lev # ★要件:Group1はpassage_informationのflesch_scoreを記録
492
  else:
493
  processed = rewrite_level(text, level, orig_lev)
@@ -546,6 +517,7 @@ def start_test(student_id, level_input, group_input, session_state):
546
  lock_student, lock_group, lock_level, lock_start
547
  )
548
 
 
549
  # ======================================================
550
  # Next / Prev / Finish(元コードのままの構造 + state参照)
551
  # ======================================================
@@ -614,6 +586,7 @@ def next_page(pages_json, current_page, total_pages, pid, orig_lev, session_stat
614
  session_state
615
  )
616
 
 
617
  def prev_page(pages_json, current_page, total_pages, pid, orig_lev, session_state):
618
  user_id = session_state.get("user_id")
619
  level = session_state.get("level")
@@ -671,6 +644,7 @@ def prev_page(pages_json, current_page, total_pages, pid, orig_lev, session_stat
671
  session_state
672
  )
673
 
 
674
  def finish_or_retire(pages_json, current_page, pid, orig_lev, action, session_state):
675
  user_id = session_state.get("user_id")
676
  level = session_state.get("level")
@@ -709,7 +683,7 @@ def finish_or_retire(pages_json, current_page, pid, orig_lev, action, session_st
709
  )
710
 
711
  if group == 1:
712
- processed = extract_main_body(new_text)
713
  measured_fre = new_orig_lev
714
  else:
715
  processed = rewrite_level(new_text, level, new_orig_lev)
@@ -766,6 +740,7 @@ def finish_or_retire(pages_json, current_page, pid, orig_lev, action, session_st
766
  session_state
767
  )
768
 
 
769
  # ======================================================
770
  # UI(タイトル表示を追加。それ以外は変更しない)
771
  # ★追加:パスワード付きログCSVダウンロード
@@ -955,7 +930,7 @@ with gr.Blocks(css=custom_css) as demo:
955
 
956
  retire_btn = gr.Button("リタイア")
957
 
958
- # ★変更:Start後に入力をロックするため、入力コンポーネントもoutputsに追加
959
  start_btn.click(
960
  fn=start_test,
961
  inputs=[student_id_input, level_input, group_input, session_state],
@@ -1045,4 +1020,4 @@ with gr.Blocks(css=custom_css) as demo:
1045
  )
1046
 
1047
  demo.queue(max_size=64)
1048
- demo.launch()
 
232
 
233
 
234
  # ======================================================
235
+ # Group1: 本文のみ抽出(★LLMで、Group2と同等の基準
236
  # ======================================================
237
 
238
+ def extract_main_body_llm(text: str) -> str:
239
+ """
240
+ Group1用:書き換えはしない。
241
+ ただしGroup2と同等の基準で「本文以外を完全除外」させるため、LLMに本文抽出だけさせる。
242
+ """
243
+ prompt = f"""
244
+ Extract ONLY the main body text from the following passage.
245
+
246
+ Rules:
247
+ - Completely EXCLUDE titles, headings, chapter labels, author names, source information,
248
+ footnotes, annotations, introductions, and any non-body content.
249
+ - Preserve the original paragraph structure of the main text.
250
+ - Insert exactly ONE blank line between paragraphs.
251
+ - Do NOT create new section breaks, chapter divisions, or headings.
252
+ - Output only the extracted main body text.
253
+ - Do not include explanations, comments, or metadata.
254
+ - Do not include [TEXT START] and [TEXT END] in the output.
255
+
256
+ [TEXT START]
257
+ {text}
258
+ [TEXT END]
259
+ """.strip()
260
+
261
+ with _rewrite_sem:
262
+ resp = client.chat.completions.create(
263
+ model="google/gemini-2.5-flash",
264
+ messages=[{"role": "user", "content": prompt}],
265
+ temperature=0.0,
266
+ max_tokens=5000
267
+ )
268
+ return resp.choices[0].message.content.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
 
 
 
270
 
271
  # ======================================================
272
  # Rewrite(同時実行制限付き) Group2で使用
 
342
  )
343
  return resp.choices[0].message.content.strip()
344
 
345
+
346
  def split_pages(text, max_words=300):
347
  sentences = re.split(r'(?<=[.!?])\s+', text.strip())
348
  pages = []
 
367
 
368
  return pages or [text]
369
 
370
+
371
  # ======================================================
372
  # Start(session_stateでユーザー状態管理)
373
  # ★Start後:入力はリロードまで固定(1回だけ)
 
458
  )
459
 
460
  if group == 1:
461
+ processed = extract_main_body_llm(text) # ★ここが変更点
462
  measured_fre = orig_lev # ★要件:Group1はpassage_informationのflesch_scoreを記録
463
  else:
464
  processed = rewrite_level(text, level, orig_lev)
 
517
  lock_student, lock_group, lock_level, lock_start
518
  )
519
 
520
+
521
  # ======================================================
522
  # Next / Prev / Finish(元コードのままの構造 + state参照)
523
  # ======================================================
 
586
  session_state
587
  )
588
 
589
+
590
  def prev_page(pages_json, current_page, total_pages, pid, orig_lev, session_state):
591
  user_id = session_state.get("user_id")
592
  level = session_state.get("level")
 
644
  session_state
645
  )
646
 
647
+
648
  def finish_or_retire(pages_json, current_page, pid, orig_lev, action, session_state):
649
  user_id = session_state.get("user_id")
650
  level = session_state.get("level")
 
683
  )
684
 
685
  if group == 1:
686
+ processed = extract_main_body_llm(new_text) # ★ここが変更点
687
  measured_fre = new_orig_lev
688
  else:
689
  processed = rewrite_level(new_text, level, new_orig_lev)
 
740
  session_state
741
  )
742
 
743
+
744
  # ======================================================
745
  # UI(タイトル表示を追加。それ以外は変更しない)
746
  # ★追加:パスワード付きログCSVダウンロード
 
930
 
931
  retire_btn = gr.Button("リタイア")
932
 
933
+ # ★Start後に入力をロックするため、入力コンポーネントもoutputsに追加
934
  start_btn.click(
935
  fn=start_test,
936
  inputs=[student_id_input, level_input, group_input, session_state],
 
1020
  )
1021
 
1022
  demo.queue(max_size=64)
1023
+ demo.launch()