Upload 3 files
Browse files- app.py +107 -0
- packages.txt +2 -0
- requirements.txt +6 -0
app.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from gtts import gTTS
|
| 3 |
+
import tempfile, os, re
|
| 4 |
+
from pykakasi import kakasi
|
| 5 |
+
from fugashi import Tagger
|
| 6 |
+
import pytesseract
|
| 7 |
+
import cv2
|
| 8 |
+
import numpy as np
|
| 9 |
+
|
| 10 |
+
# ===== Utilities =====
|
| 11 |
+
tagger = Tagger() # uses unidic-lite in requirements
|
| 12 |
+
kk = kakasi()
|
| 13 |
+
kk.setMode("J","H") # Kanji -> Hiragana
|
| 14 |
+
kk.setMode("K","H") # Katakana -> Hiragana
|
| 15 |
+
kk.setMode("H","H") # Hiragana -> Hiragana
|
| 16 |
+
conv = kk.getConverter()
|
| 17 |
+
|
| 18 |
+
def normalize_text(t:str)->str:
|
| 19 |
+
t = t.replace("\u3000", " ").strip()
|
| 20 |
+
t = re.sub(r"[ \t]+", " ", t)
|
| 21 |
+
return t
|
| 22 |
+
|
| 23 |
+
def to_furigana_html(text:str)->str:
|
| 24 |
+
# very simple token-level ruby using fugashi + pykakasi
|
| 25 |
+
tokens = tagger(text)
|
| 26 |
+
parts = []
|
| 27 |
+
for tok in tokens:
|
| 28 |
+
surf = tok.surface
|
| 29 |
+
# reading: try feature; fallback to kakasi
|
| 30 |
+
reading = tok.feature.dict.get("reading")
|
| 31 |
+
if not reading:
|
| 32 |
+
reading = conv.do(surf)
|
| 33 |
+
# if kana same as reading, no ruby
|
| 34 |
+
if reading and reading != surf:
|
| 35 |
+
parts.append(f"<ruby>{surf}<rt>{reading}</rt></ruby>")
|
| 36 |
+
else:
|
| 37 |
+
parts.append(surf)
|
| 38 |
+
return "<p style='line-height:2.0;font-size:1.2em'>" + "".join(parts) + "</p>"
|
| 39 |
+
|
| 40 |
+
def explain(text:str):
|
| 41 |
+
text = normalize_text(text)
|
| 42 |
+
if not text:
|
| 43 |
+
return "", None, "请输入日文文本。"
|
| 44 |
+
# TTS (gTTS online, fast)
|
| 45 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
|
| 46 |
+
gTTS(text, lang="ja").save(fp.name)
|
| 47 |
+
audio_path = fp.name
|
| 48 |
+
|
| 49 |
+
# Furigana HTML
|
| 50 |
+
furigana_html = to_furigana_html(text)
|
| 51 |
+
|
| 52 |
+
# Simple token table: surface, lemma, pos, reading
|
| 53 |
+
rows = []
|
| 54 |
+
for tok in tagger(text):
|
| 55 |
+
rows.append({
|
| 56 |
+
"词形": tok.surface,
|
| 57 |
+
"原形": tok.feature.lemma if hasattr(tok.feature, "lemma") else tok.surface,
|
| 58 |
+
"词性": tok.feature.part_of_speech if hasattr(tok.feature, "part_of_speech") else "",
|
| 59 |
+
"读音": tok.feature.dict.get("reading") or conv.do(tok.surface)
|
| 60 |
+
})
|
| 61 |
+
# Build HTML table (Gradio Markdown friendly)
|
| 62 |
+
header = "| 词形 | 原形 | 词性 | 读音 |\n|---|---|---|---|\n"
|
| 63 |
+
body = "\n".join([f"| {r['词形']} | {r['原形']} | {r['词性']} | {r['读音']} |" for r in rows]) or "| | | | |"
|
| 64 |
+
table_md = header + body
|
| 65 |
+
|
| 66 |
+
tips = "提示:朗读使用 gTTS(联网)。截图识别用 Tesseract(日语)。如遇识别不准,可先手动粘贴文本。"
|
| 67 |
+
return furigana_html, audio_path, tips
|
| 68 |
+
|
| 69 |
+
# ----- OCR from screenshot -----
|
| 70 |
+
def ocr_image(img: np.ndarray):
|
| 71 |
+
if img is None:
|
| 72 |
+
return ""
|
| 73 |
+
# convert to gray & binarize to help OCR
|
| 74 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
| 75 |
+
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY)[1]
|
| 76 |
+
config = "--oem 3 --psm 6 -l jpn"
|
| 77 |
+
text = pytesseract.image_to_string(gray, config=config)
|
| 78 |
+
return normalize_text(text)
|
| 79 |
+
|
| 80 |
+
# ===== UI =====
|
| 81 |
+
with gr.Blocks(css="footer {visibility: hidden}") as demo:
|
| 82 |
+
gr.Markdown("## 🈺 Paste‑to‑Speak 日语朗读助手 (MVP)\n粘贴文本或粘贴网页截图,生成**假名注音**并**朗读**。")
|
| 83 |
+
|
| 84 |
+
with gr.Tab("文本朗读"):
|
| 85 |
+
inp = gr.Textbox(label="粘贴日文文本", lines=6, placeholder="ここに日本語を貼り付けてください。")
|
| 86 |
+
btn = gr.Button("生成 朗读 + 注音")
|
| 87 |
+
furigana = gr.HTML()
|
| 88 |
+
audio = gr.Audio(label="朗读", autoplay=False)
|
| 89 |
+
note = gr.Markdown()
|
| 90 |
+
btn.click(explain, inputs=inp, outputs=[furigana, audio, note])
|
| 91 |
+
|
| 92 |
+
with gr.Tab("截图 → OCR → 朗读"):
|
| 93 |
+
img = gr.Image(label="粘贴或拖拽网页截图(含日文)", type="numpy")
|
| 94 |
+
ocr_btn = gr.Button("识别文字")
|
| 95 |
+
ocr_text = gr.Textbox(label="识别结果(可编辑)", lines=6)
|
| 96 |
+
go_btn = gr.Button("对识别结果 朗读 + 注音")
|
| 97 |
+
furigana2 = gr.HTML()
|
| 98 |
+
audio2 = gr.Audio(label="朗读", autoplay=False)
|
| 99 |
+
note2 = gr.Markdown()
|
| 100 |
+
|
| 101 |
+
ocr_btn.click(ocr_image, inputs=img, outputs=ocr_text)
|
| 102 |
+
go_btn.click(explain, inputs=ocr_text, outputs=[furigana2, audio2, note2])
|
| 103 |
+
|
| 104 |
+
gr.Markdown("— v0.1 · Gradio on Hugging Face Spaces · 作者:Sissi 内测版")
|
| 105 |
+
|
| 106 |
+
if __name__ == "__main__":
|
| 107 |
+
demo.launch()
|
packages.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
tesseract-ocr
|
| 2 |
+
tesseract-ocr-jpn
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio==4.44.0
|
| 2 |
+
gTTS==2.5.1
|
| 3 |
+
pykakasi==2.2.1
|
| 4 |
+
fugashi[unidic-lite]==1.3.2
|
| 5 |
+
pytesseract==0.3.13
|
| 6 |
+
opencv-python-headless==4.10.0.84
|