File size: 2,194 Bytes
f40ca12
 
 
 
645110b
6644358
 
f40ca12
bb7c771
f40ca12
 
6644358
 
466a55e
6644358
d5a1f1c
 
6644358
 
 
 
645110b
 
 
 
 
43f9d56
 
 
645110b
 
 
5129d6c
61f2980
 
f40ca12
5129d6c
 
 
f40ca12
5129d6c
f40ca12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bb7c771
f40ca12
 
bb7c771
 
 
f40ca12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import cv2
import asyncio
from pytesseract import pytesseract, Output
import numpy as np
from urllib.parse import urlparse, quote
from requests_cache import CachedSession, DEFAULT_CACHE_NAME
from datetime import timedelta
from ..config import CRAMP, ALPHA
from pandas import concat


session = CachedSession(
    DEFAULT_CACHE_NAME,
    backend="memory",
    expire_after=timedelta(days=7),
    # cache_control=True,
    # use_cache_dir=True,
    allowable_methods=["GET", "POST"],
)


def make_safe_url(url: str) -> str:
    """
    Returns a parsed and quoted url
    """
    _url = urlparse(url)
    url = _url.scheme + "://" + _url.netloc + quote(_url.path)
    if _url.query:
        url += "?" + quote(_url.query)
    return url


async def get_url_image(url: str):
    # url = make_safe_url(url)
    print(url)
    loop = asyncio.get_event_loop()
    res = await loop.run_in_executor(None, session.get, url)
    print(res.cache_key, res.from_cache, res.headers, url)
    arr = np.asarray(bytearray(res.content), dtype=np.uint8)
    img = cv2.imdecode(arr, -1)  # 'Load it as it is'
    return (img, res.headers)


async def mark_image(img: cv2.typing.MatLike, q: str) -> cv2.typing.MatLike:
    loop = asyncio.get_event_loop()
    overlay = img.copy()

    boxes = await loop.run_in_executor(
        None,
        pytesseract.image_to_data,
        img,
        "ben+eng",
        "",
        0,
        Output.DATAFRAME,
    )

    boxes = boxes.dropna(subset=["text"])
    boxes = boxes[boxes["text"].str.len() > 1]
    boxes = concat(
        [boxes[boxes["text"].str.contains(word.strip())] for word in q.split()]
    )

    for _, box in boxes.iterrows():
        left = box["left"]
        top = box["top"]
        width = box["width"]
        height = box["height"]

        cv2.rectangle(
            overlay,
            (left, top + CRAMP),
            (left + width, top + height - CRAMP),
            (0, 255, 255),
            -1,
        )

    img_new = cv2.addWeighted(overlay, ALPHA, img, 1 - ALPHA, 0)
    r = 1000.0 / img_new.shape[1]
    dim = (1000, int(img_new.shape[0] * r))
    resized = cv2.resize(img_new, dim, interpolation=cv2.INTER_AREA)
    return resized