image-rec / app /utils /mark.py
Zul Ikram Musaddik Rayat
implemented in-memory response caching
466a55e
import cv2
import asyncio
from pytesseract import pytesseract, Output
import numpy as np
from urllib.parse import urlparse, quote
from requests_cache import CachedSession, DEFAULT_CACHE_NAME
from datetime import timedelta
from ..config import CRAMP, ALPHA
from pandas import concat
session = CachedSession(
DEFAULT_CACHE_NAME,
backend="memory",
expire_after=timedelta(days=7),
# cache_control=True,
# use_cache_dir=True,
allowable_methods=["GET", "POST"],
)
def make_safe_url(url: str) -> str:
"""
Returns a parsed and quoted url
"""
_url = urlparse(url)
url = _url.scheme + "://" + _url.netloc + quote(_url.path)
if _url.query:
url += "?" + quote(_url.query)
return url
async def get_url_image(url: str):
# url = make_safe_url(url)
print(url)
loop = asyncio.get_event_loop()
res = await loop.run_in_executor(None, session.get, url)
print(res.cache_key, res.from_cache, res.headers, url)
arr = np.asarray(bytearray(res.content), dtype=np.uint8)
img = cv2.imdecode(arr, -1) # 'Load it as it is'
return (img, res.headers)
async def mark_image(img: cv2.typing.MatLike, q: str) -> cv2.typing.MatLike:
loop = asyncio.get_event_loop()
overlay = img.copy()
boxes = await loop.run_in_executor(
None,
pytesseract.image_to_data,
img,
"ben+eng",
"",
0,
Output.DATAFRAME,
)
boxes = boxes.dropna(subset=["text"])
boxes = boxes[boxes["text"].str.len() > 1]
boxes = concat(
[boxes[boxes["text"].str.contains(word.strip())] for word in q.split()]
)
for _, box in boxes.iterrows():
left = box["left"]
top = box["top"]
width = box["width"]
height = box["height"]
cv2.rectangle(
overlay,
(left, top + CRAMP),
(left + width, top + height - CRAMP),
(0, 255, 255),
-1,
)
img_new = cv2.addWeighted(overlay, ALPHA, img, 1 - ALPHA, 0)
r = 1000.0 / img_new.shape[1]
dim = (1000, int(img_new.shape[0] * r))
resized = cv2.resize(img_new, dim, interpolation=cv2.INTER_AREA)
return resized