dcorcoran commited on
Commit
bc27ad8
·
1 Parent(s): 82f798a

Changed OCR process

Browse files
Files changed (1) hide show
  1. app/services/ocr_service.py +52 -43
app/services/ocr_service.py CHANGED
@@ -1,6 +1,7 @@
1
  import pytesseract
2
  import re
3
- from PIL import Image
 
4
  import sys
5
  import os
6
 
@@ -8,37 +9,39 @@ import os
8
  class OCRService:
9
 
10
  def __init__(self):
11
- # Auto-detect tesseract path
12
  if sys.platform.startswith("win"):
13
  pytesseract.pytesseract.tesseract_cmd = os.getenv("TESSERACT_PATH", "C:/Program Files/Tesseract-OCR/tesseract.exe")
14
  else:
15
- # Linux / Hugging Face Spaces
16
  pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
17
- print(f"OCR service initialized. Using Tesseract at {pytesseract.pytesseract.tesseract_cmd}")
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  def extract(self, image: Image.Image) -> dict:
20
  w, h = image.size
21
 
22
- # --------------------------------
23
- # ----- CROP REGIONS -------------
24
- # --------------------------------
25
-
26
- # Card name — top left area
27
- name_region = image.crop((0.15 * w, 0.02 * h, 0.75 * w, 0.10 * h))
28
 
29
- # HP — top right area
30
- hp_region = image.crop((0.60 * w, 0.02 * h, 0.95 * w, 0.10 * h))
31
 
32
- # Moves — lower middle section
33
- moves_region = image.crop((0.00 * w, 0.55 * h, 1.00 * w, 0.85 * h))
34
 
35
  # Full image for type detection
36
  full_text = pytesseract.image_to_string(image)
37
 
38
- # --------------------------------
39
- # ----- EXTRACT FIELDS -----------
40
- # --------------------------------
41
-
42
  return {
43
  "name": self._extract_name(name_region),
44
  "hp": self._extract_hp(hp_region),
@@ -46,29 +49,27 @@ class OCRService:
46
  "moves": self._extract_moves(moves_region),
47
  }
48
 
49
- # --------------------------------
50
- # ----- EXTRACTORS ---------------
51
- # --------------------------------
52
-
53
  def _extract_name(self, region: Image.Image) -> str | None:
54
- # Upscale region for better OCR accuracy
55
- region = region.resize(
56
- (region.width * 3, region.height * 3),
57
- Image.LANCZOS
58
- )
59
- text = pytesseract.image_to_string(region, config="--psm 7").strip()
 
 
60
  return text if text else None
61
 
62
  def _extract_hp(self, region: Image.Image) -> str | None:
63
- region = region.resize(
64
- (region.width * 3, region.height * 3),
65
- Image.LANCZOS
66
- )
67
- text = pytesseract.image_to_string(region, config="--psm 7")
68
  match = re.search(r'(\d+)\s*HP|HP\s*(\d+)', text, re.IGNORECASE)
69
  if match:
70
  return match.group(1) or match.group(2)
71
- return None
 
 
72
 
73
  def _extract_types(self, text: str) -> list[str] | None:
74
  types = [
@@ -80,25 +81,33 @@ class OCRService:
80
  return found if found else None
81
 
82
  def _extract_moves(self, region: Image.Image) -> list[dict] | None:
83
- region = region.resize(
84
- (region.width * 2, region.height * 2),
85
- Image.LANCZOS
86
- )
87
- text = pytesseract.image_to_string(region)
88
  lines = [line.strip() for line in text.splitlines() if line.strip()]
89
 
90
  moves = []
91
  i = 0
92
  while i < len(lines):
93
- # Match move name with damage e.g. "Lightning Flash 20"
94
- match = re.match(r'^([A-Z][a-zA-Z\s]+?)\s+(\d+\+?)$', lines[i])
 
 
 
 
95
  if match:
 
 
 
 
 
 
 
96
  moves.append({
97
  "name": match.group(1).strip(),
98
  "damage": match.group(2).strip(),
99
- "text": lines[i + 1] if i + 1 < len(lines) else None
100
  })
101
- i += 2
102
  else:
103
  i += 1
104
 
 
1
  import pytesseract
2
  import re
3
+ from PIL import Image, ImageFilter, ImageEnhance
4
+ import numpy as np
5
  import sys
6
  import os
7
 
 
9
  class OCRService:
10
 
11
  def __init__(self):
 
12
  if sys.platform.startswith("win"):
13
  pytesseract.pytesseract.tesseract_cmd = os.getenv("TESSERACT_PATH", "C:/Program Files/Tesseract-OCR/tesseract.exe")
14
  else:
 
15
  pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
16
+
17
+ def _preprocess(self, region: Image.Image, scale: int = 3) -> Image.Image:
18
+ """Upscale, convert to grayscale, and threshold for better OCR."""
19
+ region = region.resize(
20
+ (region.width * scale, region.height * scale),
21
+ Image.LANCZOS
22
+ )
23
+ region = region.convert("L") # grayscale
24
+ # Increase contrast
25
+ region = ImageEnhance.Contrast(region).enhance(2.0)
26
+ # Threshold to black/white
27
+ region = region.point(lambda x: 0 if x < 140 else 255, "1").convert("L")
28
+ return region
29
 
30
  def extract(self, image: Image.Image) -> dict:
31
  w, h = image.size
32
 
33
+ # Name — skip "Basic Pokemon" line at very top, just grab name row
34
+ name_region = image.crop((0.05 * w, 0.06 * h, 0.72 * w, 0.13 * h))
 
 
 
 
35
 
36
+ # HP — top right, large number + "HP" text
37
+ hp_region = image.crop((0.55 * w, 0.04 * h, 0.97 * w, 0.13 * h))
38
 
39
+ # Moves — middle to lower section
40
+ moves_region = image.crop((0.02 * w, 0.52 * h, 0.98 * w, 0.88 * h))
41
 
42
  # Full image for type detection
43
  full_text = pytesseract.image_to_string(image)
44
 
 
 
 
 
45
  return {
46
  "name": self._extract_name(name_region),
47
  "hp": self._extract_hp(hp_region),
 
49
  "moves": self._extract_moves(moves_region),
50
  }
51
 
 
 
 
 
52
  def _extract_name(self, region: Image.Image) -> str | None:
53
+ region = self._preprocess(region, scale=3)
54
+ text = pytesseract.image_to_string(region, config="--psm 7 --oem 3").strip()
55
+ # Clean up noise keep only lines that look like a name
56
+ lines = [l.strip() for l in text.splitlines() if l.strip()]
57
+ for line in lines:
58
+ # Skip lines that are clearly not a name
59
+ if re.search(r'[A-Z][a-z]+', line) and len(line) < 30:
60
+ return line
61
  return text if text else None
62
 
63
  def _extract_hp(self, region: Image.Image) -> str | None:
64
+ region = self._preprocess(region, scale=3)
65
+ text = pytesseract.image_to_string(region, config="--psm 6 --oem 3")
66
+ # Look for a number near "HP"
 
 
67
  match = re.search(r'(\d+)\s*HP|HP\s*(\d+)', text, re.IGNORECASE)
68
  if match:
69
  return match.group(1) or match.group(2)
70
+ # Fallback: just grab any standalone number (the HP value)
71
+ match = re.search(r'\b(\d{2,3})\b', text)
72
+ return match.group(1) if match else None
73
 
74
  def _extract_types(self, text: str) -> list[str] | None:
75
  types = [
 
81
  return found if found else None
82
 
83
  def _extract_moves(self, region: Image.Image) -> list[dict] | None:
84
+ region = self._preprocess(region, scale=2)
85
+ text = pytesseract.image_to_string(region, config="--psm 6 --oem 3")
 
 
 
86
  lines = [line.strip() for line in text.splitlines() if line.strip()]
87
 
88
  moves = []
89
  i = 0
90
  while i < len(lines):
91
+ # Match: "MoveName 10" or "MoveName 10+" or "MoveName" alone on a line (0 damage moves)
92
+ match = re.match(r'^([A-Z][a-zA-Z\s]{2,25}?)\s{2,}(\d+\+?)$', lines[i])
93
+ if not match:
94
+ # Try looser match for lines like "Psychic 10+"
95
+ match = re.match(r'^([A-Z][a-zA-Z]+)\s+(\d+\+?)$', lines[i])
96
+
97
  if match:
98
+ # Collect any following lines as move description until next move or end
99
+ desc_lines = []
100
+ j = i + 1
101
+ while j < len(lines) and not re.match(r'^[A-Z][a-zA-Z\s]+\s+\d+', lines[j]):
102
+ desc_lines.append(lines[j])
103
+ j += 1
104
+
105
  moves.append({
106
  "name": match.group(1).strip(),
107
  "damage": match.group(2).strip(),
108
+ "text": " ".join(desc_lines) if desc_lines else None
109
  })
110
+ i = j
111
  else:
112
  i += 1
113