CassianK commited on
Commit
b94ccd9
Β·
verified Β·
1 Parent(s): 82d2909

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -110
app.py CHANGED
@@ -1,122 +1,94 @@
1
- # app.py β€” DeepSeek-OCR (HF Space, Claude Skill-ready)
2
- # - /ocr : REST API (POST) β†’ file / image_b64 / image_url 지원
3
- # - Gradio UI : λΈŒλΌμš°μ €μ—μ„œ μ—…λ‘œλ“œ ν…ŒμŠ€νŠΈ
4
- # 폴더 ꡬ쑰 μ „μ œ:
5
- # /app.py
6
- # /DeepSeek-OCR-master/ (repo κ·ΈλŒ€λ‘œ)
7
- # /requirements.txt
8
-
9
- import io
10
- import os
11
- import sys
12
- import base64
13
- import traceback
14
- from typing import Optional
15
 
 
 
16
  from PIL import Image
17
  import numpy as np
18
-
19
  import gradio as gr
20
  from fastapi import FastAPI, UploadFile, File, Body
21
  from fastapi.responses import JSONResponse
22
 
23
  # ─────────────────────────────────────────────
24
- # 0) Repo 경둜 μΆ”κ°€
25
  # ─────────────────────────────────────────────
26
  ROOT = os.path.dirname(__file__)
27
- DEEPSEEK_ROOT = os.path.join(ROOT, "DeepSeek-OCR-master")
28
- if DEEPSEEK_ROOT not in sys.path:
29
- sys.path.append(DEEPSEEK_ROOT)
 
 
 
 
 
 
 
 
30
 
31
  # ─────────────────────────────────────────────
32
- # 1) DeepSeek-OCR μ–΄λŒ‘ν„°
33
- # - μ €μž₯μ†Œκ°€ μ œκ³΅ν•˜λŠ” μ‹€μ œ μ§„μž…μ  이름이 λ‹€λ₯Ό 수 μžˆμ–΄
34
- # μ—¬λŸ¬ νŒ¨ν„΄μ„ μ‹œλ„ν•˜λ„λ‘ κ΅¬μ„±ν–ˆμŠ΅λ‹ˆλ‹€.
35
- # - ν•„μš” μ‹œ μ•„λž˜ "TODO" 뢀뢄을 μ‹€μ œ ν•¨μˆ˜λͺ…μœΌλ‘œ λ°”κΎΈμ„Έμš”.
36
  # ─────────────────────────────────────────────
37
  class DeepSeekOCRAdapter:
38
  def __init__(self):
39
- """
40
- κ°€λŠ₯ν•œ μ—”νŠΈλ¦¬ μ‹œλ‚˜λ¦¬μ˜€:
41
- A) deeps eek_ocr.py 내뢀에 클래슀/ν•¨μˆ˜ 제곡
42
- - class DeepSeekOCR β†’ .recognize(Image) λ°˜ν™˜
43
- - def ocr_image(Image, lang="auto") λ°˜ν™˜
44
- B) run_dpsk_ocr_image.py 내뢀에 ν•¨μˆ˜ 제곡
45
- - def infer(Image) λ˜λŠ” def run(Image, ...) λ“±
46
- """
47
- self.backend = None
48
- self.fn = None # callable(image, lang='auto') -> str
49
-
50
- # A-1) class DeepSeekOCR μ‹œλ„
51
  try:
52
- import deepseek_ocr as dso # DeepSeek-OCR-master/deeps eek_ocr.py
 
 
 
 
53
  if hasattr(dso, "DeepSeekOCR"):
54
- self.backend = dso.DeepSeekOCR()
55
- def _call(image: Image.Image, lang="auto"):
56
- # ν΄λž˜μŠ€κ°€ recognize(image, lang) λ³΄μœ ν•œλ‹€κ³  κ°€μ •
57
- if hasattr(self.backend, "recognize"):
58
- return self.backend.recognize(image, lang=lang)
59
- # ν˜Ήμ€ run/image_to_text λ“±μ˜ 이름일 수 있음
60
- for cand in ("run", "infer", "image_to_text", "predict"):
61
- if hasattr(self.backend, cand):
62
- return getattr(self.backend, cand)(image)
63
- raise AttributeError("DeepSeekOCR class found but no callable method.")
64
- self.fn = _call
65
- print("[DeepSeekOCRAdapter] Using deeps eek_ocr.DeepSeekOCR")
66
  return
67
  except Exception as e:
68
- print("[DeepSeekOCRAdapter] A-1 fallback:", e)
69
 
70
- # A-2) ν•¨μˆ˜ν˜• ocr_image μ‹œλ„
71
  try:
72
- import deepseek_ocr as dso
73
- if hasattr(dso, "ocr_image"):
74
- def _call(image: Image.Image, lang="auto"):
75
- return dso.ocr_image(image, lang=lang) # TODO: ν•„μš” μ‹œ 인자λͺ… λ§žμΆ”κΈ°
76
- self.fn = _call
77
- print("[DeepSeekOCRAdapter] Using deeps eek_ocr.ocr_image")
 
 
78
  return
79
  except Exception as e:
80
- print("[DeepSeekOCRAdapter] A-2 fallback:", e)
81
 
82
- # B) run_dpsk_ocr_image.py μŠ€ν¬λ¦½νŠΈν˜• μ‹œλ„
83
  try:
84
  import run_dpsk_ocr_image as runner
85
- for cand in ("infer", "run", "predict", "main"):
86
- if hasattr(runner, cand) and callable(getattr(runner, cand)):
87
- def _call(image: Image.Image, lang="auto", _fn=getattr(runner, cand)):
88
- # NOTE: ν•΄λ‹Ή ν•¨μˆ˜κ°€ PIL.Imageκ°€ μ•„λ‹Œ 파일경둜λ₯Ό μš”κ΅¬ν•  수 μžˆμŠ΅λ‹ˆλ‹€.
89
- # 그런 경우 μž„μ‹œ 파일둜 μ €μž₯ν•΄ λ„˜κΉλ‹ˆλ‹€.
90
- try:
91
- return _fn(image) # PIL.Image 직접 λ°›λŠ” μΌ€μ΄μŠ€
92
- except Exception:
93
- import tempfile
94
- with tempfile.NamedTemporaryFile(suffix=".png", delete=True) as tmp:
95
- image.save(tmp.name)
96
- # κ°€μž₯ ν”ν•œ CLI μŠ€νƒ€μΌ: (path)만 or (path, config)
97
- try:
98
- return _fn(tmp.name)
99
- except Exception:
100
- # λ°˜ν™˜μ΄ dict/text λ“± 무엇이든 str둜 μΊμŠ€νŒ…
101
- return str(_fn(tmp.name))
102
  self.fn = _call
103
- print(f"[DeepSeekOCRAdapter] Using run_dpsk_ocr_image.{cand}")
104
  return
105
  except Exception as e:
106
- print("[DeepSeekOCRAdapter] B fallback:", e)
107
 
108
- # λ§ˆμ§€λ§‰ μ•ˆμ „μž₯치: 데λͺ¨
109
- print("[DeepSeekOCRAdapter] No concrete entry found. Falling back to DEMO.")
110
- def _demo(image: Image.Image, lang="auto"):
111
- return "[DEMO] μ—°κ²° μ™„λ£Œ β€” μ‹€μ œ ν•¨μˆ˜λͺ…을 app.pyμ—μ„œ ν•œ μ€„λ§Œ λ°”κΏ”μ£Όμ„Έμš”."
112
- self.fn = _demo
113
 
114
- def recognize(self, image: Image.Image, lang: str = "auto") -> str:
115
- return self.fn(image, lang=lang)
116
 
117
 
118
  # ─────────────────────────────────────────────
119
- # 2) μœ ν‹Έ
120
  # ─────────────────────────────────────────────
121
  def _to_pil(x) -> Image.Image:
122
  if isinstance(x, Image.Image):
@@ -128,8 +100,7 @@ def _to_pil(x) -> Image.Image:
128
  raise TypeError("Unsupported image type")
129
 
130
  def _b64_to_image(image_b64: str) -> Image.Image:
131
- raw = base64.b64decode(image_b64)
132
- return _to_pil(raw)
133
 
134
  def _url_to_image(url: str) -> Image.Image:
135
  import requests
@@ -139,55 +110,49 @@ def _url_to_image(url: str) -> Image.Image:
139
 
140
 
141
  # ─────────────────────────────────────────────
142
- # 3) FastAPI (REST)
143
  # ─────────────────────────────────────────────
144
  api = FastAPI(title="DeepSeek-OCR API")
145
  _engine = DeepSeekOCRAdapter()
146
 
147
  @api.post("/ocr")
148
- async def ocr(
149
  image_b64: Optional[str] = Body(default=None),
150
  image_url: Optional[str] = Body(default=None),
151
  lang: str = Body(default="auto"),
152
  file: Optional[UploadFile] = File(default=None),
153
  ):
154
  try:
155
- if file is not None:
156
  image = _to_pil(await file.read())
157
  elif image_b64:
158
  image = _b64_to_image(image_b64)
159
  elif image_url:
160
  image = _url_to_image(image_url)
161
  else:
162
- return JSONResponse(status_code=400, content={
163
- "ok": False, "error": "Provide one of: file | image_b64 | image_url"
164
- })
165
- text = _engine.recognize(image, lang=lang)
166
  return {"ok": True, "text": text}
167
  except Exception as e:
168
- return JSONResponse(status_code=500, content={
169
- "ok": False, "error": str(e), "trace": traceback.format_exc()
170
- })
171
 
172
  # ─────────────────────────────────────────────
173
- # 4) Gradio UI (ν…ŒμŠ€νŠΈ)
174
  # ─────────────────────────────────────────────
175
- def _predict(image, lang):
176
- if image is None:
177
- return "No image."
178
- pil = _to_pil(image)
179
- return _engine.recognize(pil, lang=lang)
180
 
181
- with gr.Blocks(title="DeepSeek-OCR (Claude-ready)") as demo:
182
  gr.Markdown("### DeepSeek-OCR (HF Space)\n이미지λ₯Ό μ—…λ‘œλ“œν•˜λ©΄ ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•©λ‹ˆλ‹€.")
183
  with gr.Row():
184
- img = gr.Image(type="pil", label="Input image")
185
  out = gr.Textbox(label="OCR Result", lines=8)
186
  lang = gr.Radio(["auto","en","ko","ja","zh"], value="auto", label="Language")
187
  btn = gr.Button("Run OCR")
188
- btn.click(_predict, inputs=[img, lang], outputs=[out])
189
 
190
- # HF SpacesλŠ” 보톡 Gradio 앱을 κΈ°λ³Έ μ—”νŠΈλ¦¬λ‘œ λ„μš°μ§€λ§Œ,
191
- # FastAPI μ—”λ“œν¬μΈνŠΈλ„ ν•¨κ»˜ λ…ΈμΆœν•˜λ €λ©΄ μ•„λž˜μ²˜λŸΌ aliasλ₯Ό λ‘‘λ‹ˆλ‹€.
192
- app = api
193
- demo.queue(concurrency_count=1)
 
1
+ # app.py β€” DeepSeek-OCR (HF Space, Claude Skill ready)
2
+ # 지원: /ocr API (REST) + Gradio UI
3
+ # ν˜Έν™˜: DeepSeek-OCR-main / DeepSeek-OCR-master / DeepSeek-OCR-hf
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ import io, os, sys, base64, traceback
6
+ from typing import Optional
7
  from PIL import Image
8
  import numpy as np
 
9
  import gradio as gr
10
  from fastapi import FastAPI, UploadFile, File, Body
11
  from fastapi.responses import JSONResponse
12
 
13
  # ─────────────────────────────────────────────
14
+ # 1. 경둜 μžλ™ 인식
15
  # ─────────────────────────────────────────────
16
  ROOT = os.path.dirname(__file__)
17
+ CANDIDATES = [
18
+ "DeepSeek-OCR-master",
19
+ "DeepSeek-OCR-hf",
20
+ os.path.join("DeepSeek-OCR-main", "DeepSeek-OCR-master"),
21
+ os.path.join("DeepSeek-OCR-main", "DeepSeek-OCR-hf"),
22
+ ]
23
+ for rel in CANDIDATES:
24
+ absdir = os.path.join(ROOT, rel)
25
+ if os.path.isdir(absdir) and absdir not in sys.path:
26
+ sys.path.append(absdir)
27
+ print(f"[path] added: {absdir}")
28
 
29
  # ─────────────────────────────────────────────
30
+ # 2. DeepSeek-OCR μ–΄λŒ‘ν„°
 
 
 
31
  # ─────────────────────────────────────────────
32
  class DeepSeekOCRAdapter:
33
  def __init__(self):
34
+ self.fn = None
35
+
36
+ # (1) deepseek_ocr.py
 
 
 
 
 
 
 
 
 
37
  try:
38
+ import deepseek_ocr as dso
39
+ if hasattr(dso, "ocr_image"):
40
+ self.fn = lambda img, lang="auto": dso.ocr_image(img, lang=lang)
41
+ print("[Adapter] Using deepseek_ocr.ocr_image()")
42
+ return
43
  if hasattr(dso, "DeepSeekOCR"):
44
+ model = dso.DeepSeekOCR()
45
+ self.fn = lambda img, lang="auto": model.recognize(img, lang=lang)
46
+ print("[Adapter] Using deepseek_ocr.DeepSeekOCR()")
 
 
 
 
 
 
 
 
 
47
  return
48
  except Exception as e:
49
+ print("[Adapter] deepseek_ocr import failed:", e)
50
 
51
+ # (2) run_dpsk_ocr.py (HF용)
52
  try:
53
+ import run_dpsk_ocr as runner
54
+ if hasattr(runner, "run"):
55
+ self.fn = lambda img, lang="auto": runner.run(img)
56
+ print("[Adapter] Using run_dpsk_ocr.run()")
57
+ return
58
+ if hasattr(runner, "infer"):
59
+ self.fn = lambda img, lang="auto": runner.infer(img)
60
+ print("[Adapter] Using run_dpsk_ocr.infer()")
61
  return
62
  except Exception as e:
63
+ print("[Adapter] run_dpsk_ocr import failed:", e)
64
 
65
+ # (3) run_dpsk_ocr_image.py (CLI μŠ€νƒ€μΌ)
66
  try:
67
  import run_dpsk_ocr_image as runner
68
+ for cand in ("run", "infer", "main"):
69
+ if hasattr(runner, cand):
70
+ fn = getattr(runner, cand)
71
+ def _call(img, lang="auto", _fn=fn):
72
+ import tempfile
73
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
74
+ img.save(tmp.name)
75
+ return str(_fn(tmp.name))
 
 
 
 
 
 
 
 
 
76
  self.fn = _call
77
+ print(f"[Adapter] Using run_dpsk_ocr_image.{cand}()")
78
  return
79
  except Exception as e:
80
+ print("[Adapter] run_dpsk_ocr_image import failed:", e)
81
 
82
+ # fallback
83
+ self.fn = lambda img, lang="auto": "[DEMO] μ—°κ²° 성곡 β€” μ‹€μ œ μΆ”λ‘  ν•¨μˆ˜ 확인 ν•„μš”."
84
+ print("[Adapter] ⚠️ DEMO fallback active.")
 
 
85
 
86
+ def recognize(self, image: Image.Image, lang="auto"):
87
+ return self.fn(image, lang)
88
 
89
 
90
  # ─────────────────────────────────────────────
91
+ # 3. μœ ν‹Έ
92
  # ─────────────────────────────────────────────
93
  def _to_pil(x) -> Image.Image:
94
  if isinstance(x, Image.Image):
 
100
  raise TypeError("Unsupported image type")
101
 
102
  def _b64_to_image(image_b64: str) -> Image.Image:
103
+ return _to_pil(base64.b64decode(image_b64))
 
104
 
105
  def _url_to_image(url: str) -> Image.Image:
106
  import requests
 
110
 
111
 
112
  # ─────────────────────────────────────────────
113
+ # 4. FastAPI
114
  # ─────────────────────────────────────────────
115
  api = FastAPI(title="DeepSeek-OCR API")
116
  _engine = DeepSeekOCRAdapter()
117
 
118
  @api.post("/ocr")
119
+ async def ocr_endpoint(
120
  image_b64: Optional[str] = Body(default=None),
121
  image_url: Optional[str] = Body(default=None),
122
  lang: str = Body(default="auto"),
123
  file: Optional[UploadFile] = File(default=None),
124
  ):
125
  try:
126
+ if file:
127
  image = _to_pil(await file.read())
128
  elif image_b64:
129
  image = _b64_to_image(image_b64)
130
  elif image_url:
131
  image = _url_to_image(image_url)
132
  else:
133
+ return JSONResponse(status_code=400, content={"ok": False, "error": "No image input"})
134
+ text = _engine.recognize(image, lang)
 
 
135
  return {"ok": True, "text": text}
136
  except Exception as e:
137
+ return JSONResponse(status_code=500, content={"ok": False, "error": str(e), "trace": traceback.format_exc()})
138
+
 
139
 
140
  # ─────────────────────────────────────────────
141
+ # 5. Gradio UI
142
  # ─────────────────────────────────────────────
143
+ def gradio_predict(img, lang):
144
+ if img is None:
145
+ return "No image provided."
146
+ return _engine.recognize(_to_pil(img), lang)
 
147
 
148
+ with gr.Blocks(title="DeepSeek-OCR (Claude Ready)") as demo:
149
  gr.Markdown("### DeepSeek-OCR (HF Space)\n이미지λ₯Ό μ—…λ‘œλ“œν•˜λ©΄ ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•©λ‹ˆλ‹€.")
150
  with gr.Row():
151
+ img = gr.Image(type="pil", label="Input Image")
152
  out = gr.Textbox(label="OCR Result", lines=8)
153
  lang = gr.Radio(["auto","en","ko","ja","zh"], value="auto", label="Language")
154
  btn = gr.Button("Run OCR")
155
+ btn.click(gradio_predict, inputs=[img, lang], outputs=[out])
156
 
157
+ app = api # FastAPI μ—”μ§„ λ…ΈμΆœ
158
+ demo.queue() # μ΅œμ‹  gradio 버전에 맞게 μˆ˜μ •