SissiFeng commited on
Commit
ce1fe0f
·
verified ·
1 Parent(s): 1bb018f

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +107 -0
  2. packages.txt +2 -0
  3. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gtts import gTTS
3
+ import tempfile, os, re
4
+ from pykakasi import kakasi
5
+ from fugashi import Tagger
6
+ import pytesseract
7
+ import cv2
8
+ import numpy as np
9
+
10
+ # ===== Utilities =====
11
+ tagger = Tagger() # uses unidic-lite in requirements
12
+ kk = kakasi()
13
+ kk.setMode("J","H") # Kanji -> Hiragana
14
+ kk.setMode("K","H") # Katakana -> Hiragana
15
+ kk.setMode("H","H") # Hiragana -> Hiragana
16
+ conv = kk.getConverter()
17
+
18
+ def normalize_text(t:str)->str:
19
+ t = t.replace("\u3000", " ").strip()
20
+ t = re.sub(r"[ \t]+", " ", t)
21
+ return t
22
+
23
+ def to_furigana_html(text:str)->str:
24
+ # very simple token-level ruby using fugashi + pykakasi
25
+ tokens = tagger(text)
26
+ parts = []
27
+ for tok in tokens:
28
+ surf = tok.surface
29
+ # reading: try feature; fallback to kakasi
30
+ reading = tok.feature.dict.get("reading")
31
+ if not reading:
32
+ reading = conv.do(surf)
33
+ # if kana same as reading, no ruby
34
+ if reading and reading != surf:
35
+ parts.append(f"<ruby>{surf}<rt>{reading}</rt></ruby>")
36
+ else:
37
+ parts.append(surf)
38
+ return "<p style='line-height:2.0;font-size:1.2em'>" + "".join(parts) + "</p>"
39
+
40
+ def explain(text:str):
41
+ text = normalize_text(text)
42
+ if not text:
43
+ return "", None, "请输入日文文本。"
44
+ # TTS (gTTS online, fast)
45
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
46
+ gTTS(text, lang="ja").save(fp.name)
47
+ audio_path = fp.name
48
+
49
+ # Furigana HTML
50
+ furigana_html = to_furigana_html(text)
51
+
52
+ # Simple token table: surface, lemma, pos, reading
53
+ rows = []
54
+ for tok in tagger(text):
55
+ rows.append({
56
+ "词形": tok.surface,
57
+ "原形": tok.feature.lemma if hasattr(tok.feature, "lemma") else tok.surface,
58
+ "词性": tok.feature.part_of_speech if hasattr(tok.feature, "part_of_speech") else "",
59
+ "读音": tok.feature.dict.get("reading") or conv.do(tok.surface)
60
+ })
61
+ # Build HTML table (Gradio Markdown friendly)
62
+ header = "| 词形 | 原形 | 词性 | 读音 |\n|---|---|---|---|\n"
63
+ body = "\n".join([f"| {r['词形']} | {r['原形']} | {r['词性']} | {r['读音']} |" for r in rows]) or "| | | | |"
64
+ table_md = header + body
65
+
66
+ tips = "提示:朗读使用 gTTS(联网)。截图识别用 Tesseract(日语)。如遇识别不准,可先手动粘贴文本。"
67
+ return furigana_html, audio_path, tips
68
+
69
+ # ----- OCR from screenshot -----
70
+ def ocr_image(img: np.ndarray):
71
+ if img is None:
72
+ return ""
73
+ # convert to gray & binarize to help OCR
74
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
75
+ gray = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY)[1]
76
+ config = "--oem 3 --psm 6 -l jpn"
77
+ text = pytesseract.image_to_string(gray, config=config)
78
+ return normalize_text(text)
79
+
80
+ # ===== UI =====
81
+ with gr.Blocks(css="footer {visibility: hidden}") as demo:
82
+ gr.Markdown("## 🈺 Paste‑to‑Speak 日语朗读助手 (MVP)\n粘贴文本或粘贴网页截图,生成**假名注音**并**朗读**。")
83
+
84
+ with gr.Tab("文本朗读"):
85
+ inp = gr.Textbox(label="粘贴日文文本", lines=6, placeholder="ここに日本語を貼り付けてください。")
86
+ btn = gr.Button("生成 朗读 + 注音")
87
+ furigana = gr.HTML()
88
+ audio = gr.Audio(label="朗读", autoplay=False)
89
+ note = gr.Markdown()
90
+ btn.click(explain, inputs=inp, outputs=[furigana, audio, note])
91
+
92
+ with gr.Tab("截图 → OCR → 朗读"):
93
+ img = gr.Image(label="粘贴或拖拽网页截图(含日文)", type="numpy")
94
+ ocr_btn = gr.Button("识别文字")
95
+ ocr_text = gr.Textbox(label="识别结果(可编辑)", lines=6)
96
+ go_btn = gr.Button("对识别结果 朗读 + 注音")
97
+ furigana2 = gr.HTML()
98
+ audio2 = gr.Audio(label="朗读", autoplay=False)
99
+ note2 = gr.Markdown()
100
+
101
+ ocr_btn.click(ocr_image, inputs=img, outputs=ocr_text)
102
+ go_btn.click(explain, inputs=ocr_text, outputs=[furigana2, audio2, note2])
103
+
104
+ gr.Markdown("— v0.1 · Gradio on Hugging Face Spaces · 作者:Sissi 内测版")
105
+
106
+ if __name__ == "__main__":
107
+ demo.launch()
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ tesseract-ocr
2
+ tesseract-ocr-jpn
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio==4.44.0
2
+ gTTS==2.5.1
3
+ pykakasi==2.2.1
4
+ fugashi[unidic-lite]==1.3.2
5
+ pytesseract==0.3.13
6
+ opencv-python-headless==4.10.0.84