CassianK commited on
Commit
5c7e360
Β·
verified Β·
1 Parent(s): b94ccd9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -113
app.py CHANGED
@@ -1,95 +1,104 @@
1
- # app.py β€” DeepSeek-OCR (HF Space, Claude Skill ready)
2
- # 지원: /ocr API (REST) + Gradio UI
3
- # ν˜Έν™˜: DeepSeek-OCR-main / DeepSeek-OCR-master / DeepSeek-OCR-hf
4
 
5
- import io, os, sys, base64, traceback
6
  from typing import Optional
7
  from PIL import Image
8
  import numpy as np
9
  import gradio as gr
10
- from fastapi import FastAPI, UploadFile, File, Body
11
- from fastapi.responses import JSONResponse
12
 
13
- # ─────────────────────────────────────────────
14
- # 1. 경둜 μžλ™ 인식
15
- # ─────────────────────────────────────────────
16
  ROOT = os.path.dirname(__file__)
17
- CANDIDATES = [
 
 
18
  "DeepSeek-OCR-master",
19
- "DeepSeek-OCR-hf",
20
  os.path.join("DeepSeek-OCR-main", "DeepSeek-OCR-master"),
 
21
  os.path.join("DeepSeek-OCR-main", "DeepSeek-OCR-hf"),
22
  ]
23
- for rel in CANDIDATES:
24
- absdir = os.path.join(ROOT, rel)
25
- if os.path.isdir(absdir) and absdir not in sys.path:
26
- sys.path.append(absdir)
27
- print(f"[path] added: {absdir}")
28
 
29
- # ─────────────────────────────────────────────
30
- # 2. DeepSeek-OCR μ–΄λŒ‘ν„°
31
- # ─────────────────────────────────────────────
32
- class DeepSeekOCRAdapter:
33
- def __init__(self):
34
- self.fn = None
35
-
36
- # (1) deepseek_ocr.py
37
- try:
38
- import deepseek_ocr as dso
39
- if hasattr(dso, "ocr_image"):
40
- self.fn = lambda img, lang="auto": dso.ocr_image(img, lang=lang)
41
- print("[Adapter] Using deepseek_ocr.ocr_image()")
42
- return
43
- if hasattr(dso, "DeepSeekOCR"):
44
- model = dso.DeepSeekOCR()
45
- self.fn = lambda img, lang="auto": model.recognize(img, lang=lang)
46
- print("[Adapter] Using deepseek_ocr.DeepSeekOCR()")
47
- return
48
- except Exception as e:
49
- print("[Adapter] deepseek_ocr import failed:", e)
50
 
51
- # (2) run_dpsk_ocr.py (HF용)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  try:
53
- import run_dpsk_ocr as runner
54
- if hasattr(runner, "run"):
55
- self.fn = lambda img, lang="auto": runner.run(img)
56
- print("[Adapter] Using run_dpsk_ocr.run()")
 
 
57
  return
58
- if hasattr(runner, "infer"):
59
- self.fn = lambda img, lang="auto": runner.infer(img)
60
- print("[Adapter] Using run_dpsk_ocr.infer()")
61
- return
62
- except Exception as e:
63
- print("[Adapter] run_dpsk_ocr import failed:", e)
64
-
65
- # (3) run_dpsk_ocr_image.py (CLI μŠ€νƒ€μΌ)
66
- try:
67
- import run_dpsk_ocr_image as runner
68
- for cand in ("run", "infer", "main"):
69
- if hasattr(runner, cand):
70
- fn = getattr(runner, cand)
71
  def _call(img, lang="auto", _fn=fn):
72
- import tempfile
73
- with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
74
  img.save(tmp.name)
75
- return str(_fn(tmp.name))
76
- self.fn = _call
77
- print(f"[Adapter] Using run_dpsk_ocr_image.{cand}()")
 
 
 
 
 
78
  return
79
  except Exception as e:
80
- print("[Adapter] run_dpsk_ocr_image import failed:", e)
 
81
 
82
  # fallback
83
- self.fn = lambda img, lang="auto": "[DEMO] μ—°κ²° 성곡 β€” μ‹€μ œ μΆ”λ‘  ν•¨μˆ˜ 확인 ν•„μš”."
84
- print("[Adapter] ⚠️ DEMO fallback active.")
85
 
86
- def recognize(self, image: Image.Image, lang="auto"):
87
- return self.fn(image, lang)
88
 
 
89
 
90
- # ─────────────────────────────────────────────
91
- # 3. μœ ν‹Έ
92
- # ─────────────────────────────────────────────
93
  def _to_pil(x) -> Image.Image:
94
  if isinstance(x, Image.Image):
95
  return x.convert("RGB")
@@ -100,53 +109,17 @@ def _to_pil(x) -> Image.Image:
100
  raise TypeError("Unsupported image type")
101
 
102
  def _b64_to_image(image_b64: str) -> Image.Image:
 
103
  return _to_pil(base64.b64decode(image_b64))
104
 
105
- def _url_to_image(url: str) -> Image.Image:
106
- import requests
107
- r = requests.get(url, timeout=20)
108
- r.raise_for_status()
109
- return _to_pil(r.content)
110
-
111
-
112
- # ─────────────────────────────────────────────
113
- # 4. FastAPI
114
- # ─────────────────────────────────────────────
115
- api = FastAPI(title="DeepSeek-OCR API")
116
- _engine = DeepSeekOCRAdapter()
117
-
118
- @api.post("/ocr")
119
- async def ocr_endpoint(
120
- image_b64: Optional[str] = Body(default=None),
121
- image_url: Optional[str] = Body(default=None),
122
- lang: str = Body(default="auto"),
123
- file: Optional[UploadFile] = File(default=None),
124
- ):
125
- try:
126
- if file:
127
- image = _to_pil(await file.read())
128
- elif image_b64:
129
- image = _b64_to_image(image_b64)
130
- elif image_url:
131
- image = _url_to_image(image_url)
132
- else:
133
- return JSONResponse(status_code=400, content={"ok": False, "error": "No image input"})
134
- text = _engine.recognize(image, lang)
135
- return {"ok": True, "text": text}
136
- except Exception as e:
137
- return JSONResponse(status_code=500, content={"ok": False, "error": str(e), "trace": traceback.format_exc()})
138
-
139
-
140
- # ─────────────────────────────────────────────
141
- # 5. Gradio UI
142
- # ─────────────────────────────────────────────
143
- def gradio_predict(img, lang):
144
- if img is None:
145
  return "No image provided."
146
- return _engine.recognize(_to_pil(img), lang)
147
 
148
- with gr.Blocks(title="DeepSeek-OCR (Claude Ready)") as demo:
149
- gr.Markdown("### DeepSeek-OCR (HF Space)\n이미지λ₯Ό μ—…λ‘œλ“œν•˜λ©΄ ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•©λ‹ˆλ‹€.")
150
  with gr.Row():
151
  img = gr.Image(type="pil", label="Input Image")
152
  out = gr.Textbox(label="OCR Result", lines=8)
@@ -154,5 +127,5 @@ with gr.Blocks(title="DeepSeek-OCR (Claude Ready)") as demo:
154
  btn = gr.Button("Run OCR")
155
  btn.click(gradio_predict, inputs=[img, lang], outputs=[out])
156
 
157
- app = api # FastAPI μ—”μ§„ λ…ΈμΆœ
158
- demo.queue() # μ΅œμ‹  gradio 버전에 맞게 μˆ˜μ •
 
1
+ # app.py β€” DeepSeek-OCR (HF Space, Gradio-only stable)
2
+ # - Gradio UI 제곡 (Claude Skill은 Gradio /run/predict API둜 호좜)
3
+ # - deepseek_ocr.py λ˜λŠ” run_dpsk_ocr_image.pyλ₯Ό 파일경둜둜 직접 λ‘œλ“œ
4
 
5
+ import io, os, sys, base64, importlib.util, tempfile, traceback
6
  from typing import Optional
7
  from PIL import Image
8
  import numpy as np
9
  import gradio as gr
 
 
10
 
 
 
 
11
  ROOT = os.path.dirname(__file__)
12
+
13
+ # 후보 디렉터리: 루트/DeepSeek-OCR-master, DeepSeek-OCR-main/DeepSeek-OCR-master, DeepSeek-OCR-hf λ“±
14
+ DIR_CANDIDATES = [
15
  "DeepSeek-OCR-master",
 
16
  os.path.join("DeepSeek-OCR-main", "DeepSeek-OCR-master"),
17
+ "DeepSeek-OCR-hf",
18
  os.path.join("DeepSeek-OCR-main", "DeepSeek-OCR-hf"),
19
  ]
 
 
 
 
 
20
 
21
+ FILE_CANDIDATES = [
22
+ "deepseek_ocr.py", # ν•¨μˆ˜ν˜• λ˜λŠ” ν΄λž˜μŠ€ν˜• μ—”νŠΈλ¦¬ κΈ°λŒ€
23
+ "run_dpsk_ocr_image.py", # CLI μŠ€νƒ€μΌ μ—”νŠΈλ¦¬ κ°€λŠ₯
24
+ "run_dpsk_ocr.py", # HF 슀크립트
25
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
+ def _find_file():
28
+ for d in DIR_CANDIDATES:
29
+ absd = os.path.join(ROOT, d)
30
+ if not os.path.isdir(absd):
31
+ continue
32
+ for fname in FILE_CANDIDATES:
33
+ path = os.path.join(absd, fname)
34
+ if os.path.isfile(path):
35
+ return path
36
+ return None
37
+
38
+ def _load_module_from_path(path: str):
39
+ name = os.path.splitext(os.path.basename(path))[0]
40
+ spec = importlib.util.spec_from_file_location(name, path)
41
+ if spec is None or spec.loader is None:
42
+ raise ImportError(f"Cannot load module from {path}")
43
+ mod = importlib.util.module_from_spec(spec)
44
+ sys.modules[name] = mod
45
+ spec.loader.exec_module(mod)
46
+ return mod
47
+
48
+ class OCRAdapter:
49
+ def __init__(self):
50
+ self.entry = None
51
+ self.mode = "demo"
52
+ self.path = _find_file()
53
+ print(f"[Adapter] candidate path: {self.path}")
54
+ if not self.path:
55
+ return
56
  try:
57
+ mod = _load_module_from_path(self.path)
58
+ # 1) ν•¨μˆ˜ν˜• μ—”νŠΈλ¦¬: ocr_image(image, lang="auto")
59
+ if hasattr(mod, "ocr_image"):
60
+ self.entry = lambda img, lang="auto": mod.ocr_image(img, lang=lang)
61
+ self.mode = "func_ocr_image"
62
+ print("[Adapter] using ocr_image(image, lang)")
63
  return
64
+ # 2) ν΄λž˜μŠ€ν˜• μ—”νŠΈλ¦¬: DeepSeekOCR().recognize(image, lang)
65
+ if hasattr(mod, "DeepSeekOCR"):
66
+ inst = mod.DeepSeekOCR()
67
+ if hasattr(inst, "recognize"):
68
+ self.entry = lambda img, lang="auto": inst.recognize(img, lang=lang)
69
+ self.mode = "class_recognize"
70
+ print("[Adapter] using DeepSeekOCR().recognize(image, lang)")
71
+ return
72
+ # 3) 슀크립트/CLIν˜•: run() / infer() / main() β€” 경둜 μš”κ΅¬ κ°€λŠ₯
73
+ for cand in ("run", "infer", "main", "predict"):
74
+ if hasattr(mod, cand):
75
+ fn = getattr(mod, cand)
 
76
  def _call(img, lang="auto", _fn=fn):
77
+ # 이미지가 파일경둜λ₯Ό μš”κ΅¬ν•  수 μžˆμœΌλ―€λ‘œ μž„μ‹œ μ €μž₯
78
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=True) as tmp:
79
  img.save(tmp.name)
80
+ try:
81
+ return str(_fn(tmp.name))
82
+ except TypeError:
83
+ # ν˜Ήμ‹œ lang λ“± λ‹€λ₯Έ 인자 ꡬ쑰일 경우 μ‹œλ„
84
+ return str(_fn(tmp.name, lang=lang))
85
+ self.entry = _call
86
+ self.mode = f"script_{cand}"
87
+ print(f"[Adapter] using {os.path.basename(self.path)}.{cand}(...) via temp file")
88
  return
89
  except Exception as e:
90
+ print("[Adapter] load failed:", e)
91
+ print(traceback.format_exc())
92
 
93
  # fallback
94
+ self.entry = lambda img, lang="auto": "[DEMO] μ—°κ²° 성곡 β€” μ‹€μ œ μΆ”λ‘  ν•¨μˆ˜ 확인 ν•„μš”."
95
+ self.mode = "demo"
96
 
97
+ def recognize(self, image: Image.Image, lang="auto") -> str:
98
+ return self.entry(image.convert("RGB"), lang)
99
 
100
+ ADAPTER = OCRAdapter()
101
 
 
 
 
102
  def _to_pil(x) -> Image.Image:
103
  if isinstance(x, Image.Image):
104
  return x.convert("RGB")
 
109
  raise TypeError("Unsupported image type")
110
 
111
  def _b64_to_image(image_b64: str) -> Image.Image:
112
+ import base64
113
  return _to_pil(base64.b64decode(image_b64))
114
 
115
+ # ── Gradio UI (Claude Skill은 /run/predict API μ‚¬μš©) ──
116
+ def gradio_predict(image, lang):
117
+ if image is None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  return "No image provided."
119
+ return ADAPTER.recognize(_to_pil(image), lang)
120
 
121
+ with gr.Blocks(title="DeepSeek-OCR (HF Gradio)") as demo:
122
+ gr.Markdown("### DeepSeek-OCR (HF Space, Gradio)\nν˜„μž¬ λͺ¨λ“œ: **" + ADAPTER.mode + "** \n경둜: " + str(ADAPTER.path))
123
  with gr.Row():
124
  img = gr.Image(type="pil", label="Input Image")
125
  out = gr.Textbox(label="OCR Result", lines=8)
 
127
  btn = gr.Button("Run OCR")
128
  btn.click(gradio_predict, inputs=[img, lang], outputs=[out])
129
 
130
+ # Hugging Face (sdk: gradio)λŠ” μ „μ—­ λ³€μˆ˜ `demo`λ₯Ό μžλ™ μ‹€ν–‰ν•©λ‹ˆλ‹€.
131
+ # demo.queue() # ν•„μš”μ‹œ μ‚¬μš© (버전별 인자 없이)