Spaces:
Sleeping
Sleeping
import cv2 | |
import asyncio | |
from pytesseract import pytesseract, Output | |
import numpy as np | |
from urllib.parse import urlparse, quote | |
from requests_cache import CachedSession, DEFAULT_CACHE_NAME | |
from datetime import timedelta | |
from ..config import CRAMP, ALPHA | |
from pandas import concat | |
session = CachedSession( | |
DEFAULT_CACHE_NAME, | |
backend="memory", | |
expire_after=timedelta(days=7), | |
# cache_control=True, | |
# use_cache_dir=True, | |
allowable_methods=["GET", "POST"], | |
) | |
def make_safe_url(url: str) -> str: | |
""" | |
Returns a parsed and quoted url | |
""" | |
_url = urlparse(url) | |
url = _url.scheme + "://" + _url.netloc + quote(_url.path) | |
if _url.query: | |
url += "?" + quote(_url.query) | |
return url | |
async def get_url_image(url: str): | |
# url = make_safe_url(url) | |
print(url) | |
loop = asyncio.get_event_loop() | |
res = await loop.run_in_executor(None, session.get, url) | |
print(res.cache_key, res.from_cache, res.headers, url) | |
arr = np.asarray(bytearray(res.content), dtype=np.uint8) | |
img = cv2.imdecode(arr, -1) # 'Load it as it is' | |
return (img, res.headers) | |
async def mark_image(img: cv2.typing.MatLike, q: str) -> cv2.typing.MatLike: | |
loop = asyncio.get_event_loop() | |
overlay = img.copy() | |
boxes = await loop.run_in_executor( | |
None, | |
pytesseract.image_to_data, | |
img, | |
"ben+eng", | |
"", | |
0, | |
Output.DATAFRAME, | |
) | |
boxes = boxes.dropna(subset=["text"]) | |
boxes = boxes[boxes["text"].str.len() > 1] | |
boxes = concat( | |
[boxes[boxes["text"].str.contains(word.strip())] for word in q.split()] | |
) | |
for _, box in boxes.iterrows(): | |
left = box["left"] | |
top = box["top"] | |
width = box["width"] | |
height = box["height"] | |
cv2.rectangle( | |
overlay, | |
(left, top + CRAMP), | |
(left + width, top + height - CRAMP), | |
(0, 255, 255), | |
-1, | |
) | |
img_new = cv2.addWeighted(overlay, ALPHA, img, 1 - ALPHA, 0) | |
r = 1000.0 / img_new.shape[1] | |
dim = (1000, int(img_new.shape[0] * r)) | |
resized = cv2.resize(img_new, dim, interpolation=cv2.INTER_AREA) | |
return resized | |