Cahnged OCR regions, add new REGEX
Browse files- app/services/ocr_service.py +38 -13
app/services/ocr_service.py
CHANGED
|
@@ -43,16 +43,28 @@ class OCRService:
|
|
| 43 |
w, h = image.size
|
| 44 |
|
| 45 |
# Crop to the top left of the card
|
| 46 |
-
top_left = image.crop((0.07 * w, 0.04 * h, 0.
|
| 47 |
|
| 48 |
# Preprocesss the cropped region
|
| 49 |
top_left = self._preprocess(top_left, scale=3)
|
| 50 |
|
| 51 |
# Uses PSM 6 (block of text mode)
|
| 52 |
text = pytesseract.image_to_string(top_left, config="--psm 6 --oem 3")
|
|
|
|
| 53 |
|
| 54 |
-
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
|
| 58 |
# Fuction to return all wanted card field texts
|
|
@@ -166,17 +178,28 @@ class OCRService:
|
|
| 166 |
|
| 167 |
# Extract weight
|
| 168 |
def _extract_weight(self, region: Image.Image) -> str | None:
|
| 169 |
-
# Preprocess the
|
| 170 |
region = self._preprocess(region, scale=3)
|
| 171 |
|
| 172 |
# Uses PSM 6 (block of text mode)
|
| 173 |
text = pytesseract.image_to_string(region, config="--psm 6 --oem 3")
|
| 174 |
|
| 175 |
-
#
|
| 176 |
match = re.search(r"Weight[:\s]+([\d.]+\s*lbs?\.?)", text, re.IGNORECASE)
|
|
|
|
|
|
|
| 177 |
|
| 178 |
-
#
|
| 179 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
|
| 181 |
|
| 182 |
# Function to get pokemon moves
|
|
@@ -237,18 +260,20 @@ class OCRService:
|
|
| 237 |
draw = ImageDraw.Draw(vis)
|
| 238 |
|
| 239 |
regions = {
|
| 240 |
-
"Name":
|
| 241 |
-
"HP":
|
| 242 |
-
"Length/Weight":
|
| 243 |
-
"Moves":
|
| 244 |
-
"is_evolved":
|
|
|
|
| 245 |
}
|
| 246 |
colors = {
|
| 247 |
"Name": "red",
|
| 248 |
"HP": "blue",
|
| 249 |
"Length/Weight": "orange",
|
| 250 |
"Moves": "green",
|
| 251 |
-
"is_evolved": "black"
|
|
|
|
| 252 |
}
|
| 253 |
|
| 254 |
for label, box in regions.items():
|
|
|
|
| 43 |
w, h = image.size
|
| 44 |
|
| 45 |
# Crop to the top left of the card
|
| 46 |
+
top_left = image.crop((0.07 * w, 0.04 * h, 0.19 * w, 0.07 * h))
|
| 47 |
|
| 48 |
# Preprocesss the cropped region
|
| 49 |
top_left = self._preprocess(top_left, scale=3)
|
| 50 |
|
| 51 |
# Uses PSM 6 (block of text mode)
|
| 52 |
text = pytesseract.image_to_string(top_left, config="--psm 6 --oem 3")
|
| 53 |
+
print("IS_EVOLVED OCR RAW:", repr(text))
|
| 54 |
|
| 55 |
+
if re.search(r'stage\s*[12]', text, re.IGNORECASE):
|
| 56 |
+
return True
|
| 57 |
+
|
| 58 |
+
# Fallback: check for "Evolves from" text which only appears on evolved cards
|
| 59 |
+
evolved_region = image.crop((0, 0.02 * h, 0.80 * w, 0.07 * h))
|
| 60 |
+
evolved_region = self._preprocess(evolved_region, scale=3)
|
| 61 |
+
evolved_text = pytesseract.image_to_string(evolved_region, config="--psm 6 --oem 3")
|
| 62 |
+
print("EVOLVES_FROM OCR RAW:", repr(evolved_text)) # remove once working
|
| 63 |
+
|
| 64 |
+
if re.search(r'evolves\s+from', evolved_text, re.IGNORECASE):
|
| 65 |
+
return True
|
| 66 |
+
|
| 67 |
+
return False
|
| 68 |
|
| 69 |
|
| 70 |
# Fuction to return all wanted card field texts
|
|
|
|
| 178 |
|
| 179 |
# Extract weight
|
| 180 |
def _extract_weight(self, region: Image.Image) -> str | None:
|
| 181 |
+
# Preprocess the weight region
|
| 182 |
region = self._preprocess(region, scale=3)
|
| 183 |
|
| 184 |
# Uses PSM 6 (block of text mode)
|
| 185 |
text = pytesseract.image_to_string(region, config="--psm 6 --oem 3")
|
| 186 |
|
| 187 |
+
# Primary: match "Weight: 76 lbs." with flexible spacing/punctuation
|
| 188 |
match = re.search(r"Weight[:\s]+([\d.]+\s*lbs?\.?)", text, re.IGNORECASE)
|
| 189 |
+
if match:
|
| 190 |
+
return match.group(1).strip()
|
| 191 |
|
| 192 |
+
# Fallback 1: find a number followed by lbs anywhere in the line
|
| 193 |
+
match = re.search(r"([\d.]+)\s*lbs?\.?", text, re.IGNORECASE)
|
| 194 |
+
if match:
|
| 195 |
+
return match.group(1).strip() + " lbs"
|
| 196 |
+
|
| 197 |
+
# Fallback 2: OCR sometimes reads "lbs" as "Ibs" (capital i) or "1bs"
|
| 198 |
+
match = re.search(r"([\d.]+)\s*[Il1]bs?\.?", text)
|
| 199 |
+
if match:
|
| 200 |
+
return match.group(1).strip() + " lbs"
|
| 201 |
+
|
| 202 |
+
return None
|
| 203 |
|
| 204 |
|
| 205 |
# Function to get pokemon moves
|
|
|
|
| 260 |
draw = ImageDraw.Draw(vis)
|
| 261 |
|
| 262 |
regions = {
|
| 263 |
+
"Name": (0.20 * w if evolved else 0.07 * w, 0.06 * h, 0.62 * w, 0.11 * h),
|
| 264 |
+
"HP": (0.64 * w, 0.06 * h, 0.85 * w, 0.11 * h),
|
| 265 |
+
"Length/Weight": (0.125 * w, 0.53 * h, 0.86 * w, 0.57 * h),
|
| 266 |
+
"Moves": (0.02 * w, 0.57 * h, 0.98 * w, 0.88 * h),
|
| 267 |
+
"is_evolved": (0.07 * w, 0.04 * h, 0.19 * w, 0.07 * h),
|
| 268 |
+
"evolves_from": (0, 0.02 * h, 0.80 * w, 0.07 * h)
|
| 269 |
}
|
| 270 |
colors = {
|
| 271 |
"Name": "red",
|
| 272 |
"HP": "blue",
|
| 273 |
"Length/Weight": "orange",
|
| 274 |
"Moves": "green",
|
| 275 |
+
"is_evolved": "black",
|
| 276 |
+
"evolves_from": "white"
|
| 277 |
}
|
| 278 |
|
| 279 |
for label, box in regions.items():
|