Spaces:
Sleeping
Sleeping
File size: 2,194 Bytes
f40ca12 645110b 6644358 f40ca12 bb7c771 f40ca12 6644358 466a55e 6644358 d5a1f1c 6644358 645110b 43f9d56 645110b 5129d6c 61f2980 f40ca12 5129d6c f40ca12 5129d6c f40ca12 bb7c771 f40ca12 bb7c771 f40ca12 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import cv2
import asyncio
from pytesseract import pytesseract, Output
import numpy as np
from urllib.parse import urlparse, quote
from requests_cache import CachedSession, DEFAULT_CACHE_NAME
from datetime import timedelta
from ..config import CRAMP, ALPHA
from pandas import concat
session = CachedSession(
DEFAULT_CACHE_NAME,
backend="memory",
expire_after=timedelta(days=7),
# cache_control=True,
# use_cache_dir=True,
allowable_methods=["GET", "POST"],
)
def make_safe_url(url: str) -> str:
"""
Returns a parsed and quoted url
"""
_url = urlparse(url)
url = _url.scheme + "://" + _url.netloc + quote(_url.path)
if _url.query:
url += "?" + quote(_url.query)
return url
async def get_url_image(url: str):
# url = make_safe_url(url)
print(url)
loop = asyncio.get_event_loop()
res = await loop.run_in_executor(None, session.get, url)
print(res.cache_key, res.from_cache, res.headers, url)
arr = np.asarray(bytearray(res.content), dtype=np.uint8)
img = cv2.imdecode(arr, -1) # 'Load it as it is'
return (img, res.headers)
async def mark_image(img: cv2.typing.MatLike, q: str) -> cv2.typing.MatLike:
loop = asyncio.get_event_loop()
overlay = img.copy()
boxes = await loop.run_in_executor(
None,
pytesseract.image_to_data,
img,
"ben+eng",
"",
0,
Output.DATAFRAME,
)
boxes = boxes.dropna(subset=["text"])
boxes = boxes[boxes["text"].str.len() > 1]
boxes = concat(
[boxes[boxes["text"].str.contains(word.strip())] for word in q.split()]
)
for _, box in boxes.iterrows():
left = box["left"]
top = box["top"]
width = box["width"]
height = box["height"]
cv2.rectangle(
overlay,
(left, top + CRAMP),
(left + width, top + height - CRAMP),
(0, 255, 255),
-1,
)
img_new = cv2.addWeighted(overlay, ALPHA, img, 1 - ALPHA, 0)
r = 1000.0 / img_new.shape[1]
dim = (1000, int(img_new.shape[0] * r))
resized = cv2.resize(img_new, dim, interpolation=cv2.INTER_AREA)
return resized
|