Spaces:
Running
Running
File size: 4,959 Bytes
455146e 5c7e360 455146e 9889d2d 5c7e360 455146e 5c7e360 9889d2d 455146e 5c7e360 455146e 5c7e360 455146e 5c7e360 455146e 9889d2d 455146e 9889d2d 5c7e360 9889d2d 5c7e360 9889d2d 455146e 9889d2d b94ccd9 9889d2d 5c7e360 455146e 9889d2d 455146e 9889d2d b94ccd9 455146e 9889d2d b94ccd9 9889d2d 455146e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
# app.py β DeepSeek-OCR (HF Space, Gradio only, recursive finder)
import io, os, sys, base64, importlib.util, tempfile, traceback
from typing import Optional, List
from PIL import Image
import numpy as np
import gradio as gr
ROOT = os.path.dirname(__file__)
TARGET_FILENAMES = [
"deepseek_ocr.py", # ν¨μν/ν΄λμ€ν
"run_dpsk_ocr_image.py", # CLI μ€νμΌ
"run_dpsk_ocr.py", # HF μ μ© μ€ν¬λ¦½νΈ
]
def find_candidates(root: str) -> List[str]:
"""νλ‘μ νΈ μ 체λ₯Ό μ¬κ·μ μΌλ‘ λ€μ Έ νκ² νμΌ κ²½λ‘λ€μ λͺ¨λ μμ§"""
hits = []
for dirpath, dirnames, filenames in os.walk(root):
for fn in filenames:
if fn in TARGET_FILENAMES:
hits.append(os.path.join(dirpath, fn))
return sorted(hits)
def load_module_from_path(path: str):
name = os.path.splitext(os.path.basename(path))[0]
spec = importlib.util.spec_from_file_location(name, path)
if not spec or not spec.loader:
raise ImportError(f"Cannot load module from {path}")
mod = importlib.util.module_from_spec(spec)
sys.modules[name] = mod
spec.loader.exec_module(mod)
return mod
class OCRAdapter:
def __init__(self):
self.mode = "demo"
self.path = None
self.debug = []
self.entry = lambda img, lang="auto": "[DEMO] DeepSeek μ½λ μ°κ²° μ μ
λλ€."
hits = find_candidates(ROOT)
self.debug.append(f"ROOT={ROOT}")
self.debug.append("FOUND=" + ("; ".join(hits) if hits else "(none)"))
for path in hits:
try:
mod = load_module_from_path(path)
# 1) ν¨μν: ocr_image(image, lang="auto")
if hasattr(mod, "ocr_image"):
self.entry = lambda img, lang="auto", _m=mod: _m.ocr_image(img, lang=lang)
self.mode, self.path = "func_ocr_image", path
self.debug.append(f"USE {path} :: ocr_image")
return
# 2) ν΄λμ€ν: DeepSeekOCR().recognize(image, lang)
if hasattr(mod, "DeepSeekOCR"):
inst = mod.DeepSeekOCR()
if hasattr(inst, "recognize"):
self.entry = lambda img, lang="auto", _i=inst: _i.recognize(img, lang=lang)
self.mode, self.path = "class_recognize", path
self.debug.append(f"USE {path} :: DeepSeekOCR.recognize")
return
# 3) μ€ν¬λ¦½νΈν: run / infer / main (νμΌκ²½λ‘ μꡬ κ°λ₯)
for cand in ("run", "infer", "main", "predict"):
if hasattr(mod, cand):
fn = getattr(mod, cand)
def _call(img, lang="auto", _fn=fn):
with tempfile.NamedTemporaryFile(suffix=".png", delete=True) as tmp:
img.save(tmp.name)
try:
return str(_fn(tmp.name))
except TypeError:
return str(_fn(tmp.name, lang=lang))
self.entry = _call
self.mode, self.path = f"script_{cand}", path
self.debug.append(f"USE {path} :: {cand}(path)")
return
self.debug.append(f"NO ENTRY in {path}")
except Exception as e:
self.debug.append(f"LOAD FAIL {path} :: {e}")
def recognize(self, image: Image.Image, lang="auto") -> str:
return self.entry(image.convert("RGB"), lang)
ADAPTER = OCRAdapter()
def _to_pil(x) -> Image.Image:
if isinstance(x, Image.Image): return x.convert("RGB")
if isinstance(x, (bytes, bytearray)): return Image.open(io.BytesIO(x)).convert("RGB")
if isinstance(x, np.ndarray): return Image.fromarray(x).convert("RGB")
raise TypeError("Unsupported image type")
def _b64_to_image(image_b64: str) -> Image.Image:
return _to_pil(base64.b64decode(image_b64))
def gradio_predict(image, lang):
if image is None: return "No image provided."
try:
return ADAPTER.recognize(_to_pil(image), lang)
except Exception as e:
return f"[ERROR] {e}\n" + traceback.format_exc()
with gr.Blocks(title="DeepSeek-OCR (HF Space, Gradio)") as demo:
gr.Markdown(
"### DeepSeek-OCR (HF Space, Gradio)\n"
f"**νμ¬ λͺ¨λ:** `{ADAPTER.mode}` \n"
f"**κ²½λ‘:** `{ADAPTER.path}` \n"
f"**μ°Ύμ ν보:** \n```\n" + "\n".join(ADAPTER.debug) + "\n```"
)
with gr.Row():
img = gr.Image(type="pil", label="Input Image")
out = gr.Textbox(label="OCR Result", lines=10)
lang = gr.Radio(["auto","en","ko","ja","zh"], value="auto", label="Language")
btn = gr.Button("Run OCR")
btn.click(gradio_predict, inputs=[img, lang], outputs=[out])
demo.launch() |