juanpablomesa
commited on
Commit
•
42ed865
1
Parent(s):
af56925
Added easyocr for text extraction
Browse files- handler.py +8 -0
- requirements.txt +3 -1
handler.py
CHANGED
@@ -18,6 +18,7 @@ from huggingface_hub import logging
|
|
18 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
19 |
|
20 |
import timeit
|
|
|
21 |
|
22 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
23 |
|
@@ -47,6 +48,7 @@ class EndpointHandler:
|
|
47 |
|
48 |
logging.set_verbosity_debug()
|
49 |
self.logger = logging.get_logger(__name__)
|
|
|
50 |
|
51 |
def download_image(self, url: str) -> bytes:
|
52 |
"""
|
@@ -183,6 +185,12 @@ class EndpointHandler:
|
|
183 |
"source_type": "images",
|
184 |
**image_metadata,
|
185 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
186 |
processed_metadata.append(complete_image_metadata)
|
187 |
|
188 |
except Exception as e:
|
|
|
18 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
19 |
|
20 |
import timeit
|
21 |
+
import easyocr
|
22 |
|
23 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
24 |
|
|
|
48 |
|
49 |
logging.set_verbosity_debug()
|
50 |
self.logger = logging.get_logger(__name__)
|
51 |
+
self.reader = easyocr.Reader(["de", "en"])
|
52 |
|
53 |
def download_image(self, url: str) -> bytes:
|
54 |
"""
|
|
|
185 |
"source_type": "images",
|
186 |
**image_metadata,
|
187 |
}
|
188 |
+
# Extract text from image using easyocr
|
189 |
+
extracted_text = self.reader.readtext(
|
190 |
+
np.array(image), detail=0
|
191 |
+
)
|
192 |
+
complete_image_metadata["extracted_text"] = extracted_text
|
193 |
+
|
194 |
processed_metadata.append(complete_image_metadata)
|
195 |
|
196 |
except Exception as e:
|
requirements.txt
CHANGED
@@ -21,4 +21,6 @@ tokenizers==0.13.3
|
|
21 |
tqdm==4.66.1
|
22 |
transformers==4.27.2
|
23 |
typing_extensions==4.8.0
|
24 |
-
urllib3==2.0.7
|
|
|
|
|
|
21 |
tqdm==4.66.1
|
22 |
transformers==4.27.2
|
23 |
typing_extensions==4.8.0
|
24 |
+
urllib3==2.0.7
|
25 |
+
easyocr==1.7.1
|
26 |
+
opencv_python_headless==4.8.1
|