dcorcoran commited on
Commit
6291913
·
1 Parent(s): 3854a46

Cahnged OCR regions, add new REGEX

Browse files
Files changed (1) hide show
  1. app/services/ocr_service.py +38 -13
app/services/ocr_service.py CHANGED
@@ -43,16 +43,28 @@ class OCRService:
43
  w, h = image.size
44
 
45
  # Crop to the top left of the card
46
- top_left = image.crop((0.07 * w, 0.04 * h, 0.18 * w, 0.07 * h))
47
 
48
  # Preprocesss the cropped region
49
  top_left = self._preprocess(top_left, scale=3)
50
 
51
  # Uses PSM 6 (block of text mode)
52
  text = pytesseract.image_to_string(top_left, config="--psm 6 --oem 3")
 
53
 
54
- # Return boolean value depending on if the words STAGE1 or STAGE 2 appear
55
- return bool(re.search(r'stage\s*[12]', text, re.IGNORECASE))
 
 
 
 
 
 
 
 
 
 
 
56
 
57
 
58
  # Fuction to return all wanted card field texts
@@ -166,17 +178,28 @@ class OCRService:
166
 
167
  # Extract weight
168
  def _extract_weight(self, region: Image.Image) -> str | None:
169
- # Preprocess the width region
170
  region = self._preprocess(region, scale=3)
171
 
172
  # Uses PSM 6 (block of text mode)
173
  text = pytesseract.image_to_string(region, config="--psm 6 --oem 3")
174
 
175
- # Find weight match through regex patterns (bewteen Weight and lbs)
176
  match = re.search(r"Weight[:\s]+([\d.]+\s*lbs?\.?)", text, re.IGNORECASE)
 
 
177
 
178
- # Return a match or None
179
- return match.group(1).strip() if match else None
 
 
 
 
 
 
 
 
 
180
 
181
 
182
  # Function to get pokemon moves
@@ -237,18 +260,20 @@ class OCRService:
237
  draw = ImageDraw.Draw(vis)
238
 
239
  regions = {
240
- "Name": (0.20 * w if evolved else 0.07 * w, 0.06 * h, 0.62 * w, 0.11 * h),
241
- "HP": (0.64 * w, 0.06 * h, 0.85 * w, 0.11 * h),
242
- "Length/Weight": (0.125 * w, 0.53 * h, 0.86 * w, 0.57 * h),
243
- "Moves": (0.02 * w, 0.57 * h, 0.98 * w, 0.88 * h),
244
- "is_evolved": (0.07 * w, 0.04 * h, 0.18 * w, 0.07 * h)
 
245
  }
246
  colors = {
247
  "Name": "red",
248
  "HP": "blue",
249
  "Length/Weight": "orange",
250
  "Moves": "green",
251
- "is_evolved": "black"
 
252
  }
253
 
254
  for label, box in regions.items():
 
43
  w, h = image.size
44
 
45
  # Crop to the top left of the card
46
+ top_left = image.crop((0.07 * w, 0.04 * h, 0.19 * w, 0.07 * h))
47
 
48
  # Preprocesss the cropped region
49
  top_left = self._preprocess(top_left, scale=3)
50
 
51
  # Uses PSM 6 (block of text mode)
52
  text = pytesseract.image_to_string(top_left, config="--psm 6 --oem 3")
53
+ print("IS_EVOLVED OCR RAW:", repr(text))
54
 
55
+ if re.search(r'stage\s*[12]', text, re.IGNORECASE):
56
+ return True
57
+
58
+ # Fallback: check for "Evolves from" text which only appears on evolved cards
59
+ evolved_region = image.crop((0, 0.02 * h, 0.80 * w, 0.07 * h))
60
+ evolved_region = self._preprocess(evolved_region, scale=3)
61
+ evolved_text = pytesseract.image_to_string(evolved_region, config="--psm 6 --oem 3")
62
+ print("EVOLVES_FROM OCR RAW:", repr(evolved_text)) # remove once working
63
+
64
+ if re.search(r'evolves\s+from', evolved_text, re.IGNORECASE):
65
+ return True
66
+
67
+ return False
68
 
69
 
70
  # Fuction to return all wanted card field texts
 
178
 
179
  # Extract weight
180
  def _extract_weight(self, region: Image.Image) -> str | None:
181
+ # Preprocess the weight region
182
  region = self._preprocess(region, scale=3)
183
 
184
  # Uses PSM 6 (block of text mode)
185
  text = pytesseract.image_to_string(region, config="--psm 6 --oem 3")
186
 
187
+ # Primary: match "Weight: 76 lbs." with flexible spacing/punctuation
188
  match = re.search(r"Weight[:\s]+([\d.]+\s*lbs?\.?)", text, re.IGNORECASE)
189
+ if match:
190
+ return match.group(1).strip()
191
 
192
+ # Fallback 1: find a number followed by lbs anywhere in the line
193
+ match = re.search(r"([\d.]+)\s*lbs?\.?", text, re.IGNORECASE)
194
+ if match:
195
+ return match.group(1).strip() + " lbs"
196
+
197
+ # Fallback 2: OCR sometimes reads "lbs" as "Ibs" (capital i) or "1bs"
198
+ match = re.search(r"([\d.]+)\s*[Il1]bs?\.?", text)
199
+ if match:
200
+ return match.group(1).strip() + " lbs"
201
+
202
+ return None
203
 
204
 
205
  # Function to get pokemon moves
 
260
  draw = ImageDraw.Draw(vis)
261
 
262
  regions = {
263
+ "Name": (0.20 * w if evolved else 0.07 * w, 0.06 * h, 0.62 * w, 0.11 * h),
264
+ "HP": (0.64 * w, 0.06 * h, 0.85 * w, 0.11 * h),
265
+ "Length/Weight": (0.125 * w, 0.53 * h, 0.86 * w, 0.57 * h),
266
+ "Moves": (0.02 * w, 0.57 * h, 0.98 * w, 0.88 * h),
267
+ "is_evolved": (0.07 * w, 0.04 * h, 0.19 * w, 0.07 * h),
268
+ "evolves_from": (0, 0.02 * h, 0.80 * w, 0.07 * h)
269
  }
270
  colors = {
271
  "Name": "red",
272
  "HP": "blue",
273
  "Length/Weight": "orange",
274
  "Moves": "green",
275
+ "is_evolved": "black",
276
+ "evolves_from": "white"
277
  }
278
 
279
  for label, box in regions.items():