Spaces:
Running
Running
phyloforfun
commited on
Commit
•
5093ce6
1
Parent(s):
70768ef
req
Browse files
requirements.txt
CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
|
|
vouchervision/OCR_google_cloud_vision (DESKTOP-548UDCR's conflicted copy 2024-06-15).py
DELETED
@@ -1,850 +0,0 @@
|
|
1 |
-
import os, io, sys, inspect, statistics, json, cv2
|
2 |
-
from statistics import mean
|
3 |
-
# from google.cloud import vision, storage
|
4 |
-
from google.cloud import vision
|
5 |
-
from google.cloud import vision_v1p3beta1 as vision_beta
|
6 |
-
from PIL import Image, ImageDraw, ImageFont
|
7 |
-
import colorsys
|
8 |
-
from tqdm import tqdm
|
9 |
-
from google.oauth2 import service_account
|
10 |
-
|
11 |
-
### LLaVA should only be installed if the user will actually use it.
|
12 |
-
### It requires the most recent pytorch/Python and can mess with older systems
|
13 |
-
|
14 |
-
|
15 |
-
'''
|
16 |
-
@misc{li2021trocr,
|
17 |
-
title={TrOCR: Transformer-based Optical Character Recognition with Pre-trained Models},
|
18 |
-
author={Minghao Li and Tengchao Lv and Lei Cui and Yijuan Lu and Dinei Florencio and Cha Zhang and Zhoujun Li and Furu Wei},
|
19 |
-
year={2021},
|
20 |
-
eprint={2109.10282},
|
21 |
-
archivePrefix={arXiv},
|
22 |
-
primaryClass={cs.CL}
|
23 |
-
}
|
24 |
-
@inproceedings{baek2019character,
|
25 |
-
title={Character Region Awareness for Text Detection},
|
26 |
-
author={Baek, Youngmin and Lee, Bado and Han, Dongyoon and Yun, Sangdoo and Lee, Hwalsuk},
|
27 |
-
booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
|
28 |
-
pages={9365--9374},
|
29 |
-
year={2019}
|
30 |
-
}
|
31 |
-
'''
|
32 |
-
|
33 |
-
class OCREngine:
|
34 |
-
|
35 |
-
BBOX_COLOR = "black"
|
36 |
-
|
37 |
-
def __init__(self, logger, json_report, dir_home, is_hf, path, cfg, trOCR_model_version, trOCR_model, trOCR_processor, device):
|
38 |
-
self.is_hf = is_hf
|
39 |
-
self.logger = logger
|
40 |
-
|
41 |
-
self.json_report = json_report
|
42 |
-
|
43 |
-
self.path = path
|
44 |
-
self.cfg = cfg
|
45 |
-
self.do_use_trOCR = self.cfg['leafmachine']['project']['do_use_trOCR']
|
46 |
-
self.OCR_option = self.cfg['leafmachine']['project']['OCR_option']
|
47 |
-
self.double_OCR = self.cfg['leafmachine']['project']['double_OCR']
|
48 |
-
self.dir_home = dir_home
|
49 |
-
|
50 |
-
# Initialize TrOCR components
|
51 |
-
self.trOCR_model_version = trOCR_model_version
|
52 |
-
self.trOCR_processor = trOCR_processor
|
53 |
-
self.trOCR_model = trOCR_model
|
54 |
-
self.device = device
|
55 |
-
|
56 |
-
self.hand_cleaned_text = None
|
57 |
-
self.hand_organized_text = None
|
58 |
-
self.hand_bounds = None
|
59 |
-
self.hand_bounds_word = None
|
60 |
-
self.hand_bounds_flat = None
|
61 |
-
self.hand_text_to_box_mapping = None
|
62 |
-
self.hand_height = None
|
63 |
-
self.hand_confidences = None
|
64 |
-
self.hand_characters = None
|
65 |
-
|
66 |
-
self.normal_cleaned_text = None
|
67 |
-
self.normal_organized_text = None
|
68 |
-
self.normal_bounds = None
|
69 |
-
self.normal_bounds_word = None
|
70 |
-
self.normal_text_to_box_mapping = None
|
71 |
-
self.normal_bounds_flat = None
|
72 |
-
self.normal_height = None
|
73 |
-
self.normal_confidences = None
|
74 |
-
self.normal_characters = None
|
75 |
-
|
76 |
-
self.trOCR_texts = None
|
77 |
-
self.trOCR_text_to_box_mapping = None
|
78 |
-
self.trOCR_bounds_flat = None
|
79 |
-
self.trOCR_height = None
|
80 |
-
self.trOCR_confidences = None
|
81 |
-
self.trOCR_characters = None
|
82 |
-
self.set_client()
|
83 |
-
self.init_craft()
|
84 |
-
|
85 |
-
self.multimodal_prompt = """I need you to transcribe all of the text in this image.
|
86 |
-
Place the transcribed text into a JSON dictionary with this form {"Transcription_Printed_Text": "text","Transcription_Handwritten_Text": "text"}"""
|
87 |
-
self.init_llava()
|
88 |
-
|
89 |
-
|
90 |
-
def set_client(self):
|
91 |
-
if self.is_hf:
|
92 |
-
self.client_beta = vision_beta.ImageAnnotatorClient(credentials=self.get_google_credentials())
|
93 |
-
self.client = vision.ImageAnnotatorClient(credentials=self.get_google_credentials())
|
94 |
-
else:
|
95 |
-
self.client_beta = vision_beta.ImageAnnotatorClient(credentials=self.get_google_credentials())
|
96 |
-
self.client = vision.ImageAnnotatorClient(credentials=self.get_google_credentials())
|
97 |
-
|
98 |
-
|
99 |
-
def get_google_credentials(self):
|
100 |
-
creds_json_str = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
|
101 |
-
credentials = service_account.Credentials.from_service_account_info(json.loads(creds_json_str))
|
102 |
-
return credentials
|
103 |
-
|
104 |
-
def init_craft(self):
|
105 |
-
if 'CRAFT' in self.OCR_option:
|
106 |
-
from craft_text_detector import load_craftnet_model, load_refinenet_model
|
107 |
-
|
108 |
-
try:
|
109 |
-
self.refine_net = load_refinenet_model(cuda=True)
|
110 |
-
self.use_cuda = True
|
111 |
-
except:
|
112 |
-
self.refine_net = load_refinenet_model(cuda=False)
|
113 |
-
self.use_cuda = False
|
114 |
-
|
115 |
-
if self.use_cuda:
|
116 |
-
self.craft_net = load_craftnet_model(weight_path=os.path.join(self.dir_home,'vouchervision','craft','craft_mlt_25k.pth'), cuda=True)
|
117 |
-
else:
|
118 |
-
self.craft_net = load_craftnet_model(weight_path=os.path.join(self.dir_home,'vouchervision','craft','craft_mlt_25k.pth'), cuda=False)
|
119 |
-
|
120 |
-
def init_llava(self):
|
121 |
-
if 'LLaVA' in self.OCR_option:
|
122 |
-
from vouchervision.OCR_llava import OCRllava
|
123 |
-
|
124 |
-
self.model_path = "liuhaotian/" + self.cfg['leafmachine']['project']['OCR_option_llava']
|
125 |
-
self.model_quant = self.cfg['leafmachine']['project']['OCR_option_llava_bit']
|
126 |
-
|
127 |
-
if self.json_report:
|
128 |
-
self.json_report.set_text(text_main=f'Loading LLaVA model: {self.model_path} Quantization: {self.model_quant}')
|
129 |
-
|
130 |
-
if self.model_quant == '4bit':
|
131 |
-
use_4bit = True
|
132 |
-
elif self.model_quant == 'full':
|
133 |
-
use_4bit = False
|
134 |
-
else:
|
135 |
-
self.logger.info(f"Provided model quantization invlid. Using 4bit.")
|
136 |
-
use_4bit = True
|
137 |
-
|
138 |
-
self.Llava = OCRllava(self.logger, model_path=self.model_path, load_in_4bit=use_4bit, load_in_8bit=False)
|
139 |
-
|
140 |
-
def init_gemini_vision(self):
|
141 |
-
pass
|
142 |
-
|
143 |
-
def init_gpt4_vision(self):
|
144 |
-
pass
|
145 |
-
|
146 |
-
|
147 |
-
def detect_text_craft(self):
|
148 |
-
from craft_text_detector import read_image, get_prediction
|
149 |
-
|
150 |
-
# Perform prediction using CRAFT
|
151 |
-
image = read_image(self.path)
|
152 |
-
|
153 |
-
link_threshold = 0.85
|
154 |
-
text_threshold = 0.4
|
155 |
-
low_text = 0.4
|
156 |
-
|
157 |
-
if self.use_cuda:
|
158 |
-
self.prediction_result = get_prediction(
|
159 |
-
image=image,
|
160 |
-
craft_net=self.craft_net,
|
161 |
-
refine_net=self.refine_net,
|
162 |
-
text_threshold=text_threshold,
|
163 |
-
link_threshold=link_threshold,
|
164 |
-
low_text=low_text,
|
165 |
-
cuda=True,
|
166 |
-
long_size=1280
|
167 |
-
)
|
168 |
-
else:
|
169 |
-
self.prediction_result = get_prediction(
|
170 |
-
image=image,
|
171 |
-
craft_net=self.craft_net,
|
172 |
-
refine_net=self.refine_net,
|
173 |
-
text_threshold=text_threshold,
|
174 |
-
link_threshold=link_threshold,
|
175 |
-
low_text=low_text,
|
176 |
-
cuda=False,
|
177 |
-
long_size=1280
|
178 |
-
)
|
179 |
-
|
180 |
-
# Initialize metadata structures
|
181 |
-
bounds = []
|
182 |
-
bounds_word = [] # CRAFT gives bounds for text regions, not individual words
|
183 |
-
text_to_box_mapping = []
|
184 |
-
bounds_flat = []
|
185 |
-
height_flat = []
|
186 |
-
confidences = [] # CRAFT does not provide confidences per character, so this might be uniformly set or estimated
|
187 |
-
characters = [] # Simulating as CRAFT doesn't provide character-level details
|
188 |
-
organized_text = ""
|
189 |
-
|
190 |
-
total_b = len(self.prediction_result["boxes"])
|
191 |
-
i=0
|
192 |
-
# Process each detected text region
|
193 |
-
for box in self.prediction_result["boxes"]:
|
194 |
-
i+=1
|
195 |
-
if self.json_report:
|
196 |
-
self.json_report.set_text(text_main=f'Locating text using CRAFT --- {i}/{total_b}')
|
197 |
-
|
198 |
-
vertices = [{"x": int(vertex[0]), "y": int(vertex[1])} for vertex in box]
|
199 |
-
|
200 |
-
# Simulate a mapping for the whole detected region as a word
|
201 |
-
text_to_box_mapping.append({
|
202 |
-
"vertices": vertices,
|
203 |
-
"text": "detected_text" # Placeholder, as CRAFT does not provide the text content directly
|
204 |
-
})
|
205 |
-
|
206 |
-
# Assuming each box is a word for the sake of this example
|
207 |
-
bounds_word.append({"vertices": vertices})
|
208 |
-
|
209 |
-
# For simplicity, we're not dividing text regions into characters as CRAFT doesn't provide this
|
210 |
-
# Instead, we create a single large 'character' per detected region
|
211 |
-
bounds.append({"vertices": vertices})
|
212 |
-
|
213 |
-
# Simulate flat bounds and height for each detected region
|
214 |
-
x_positions = [vertex["x"] for vertex in vertices]
|
215 |
-
y_positions = [vertex["y"] for vertex in vertices]
|
216 |
-
min_x, max_x = min(x_positions), max(x_positions)
|
217 |
-
min_y, max_y = min(y_positions), max(y_positions)
|
218 |
-
avg_height = max_y - min_y
|
219 |
-
height_flat.append(avg_height)
|
220 |
-
|
221 |
-
# Assuming uniform confidence for all detected regions
|
222 |
-
confidences.append(1.0) # Placeholder confidence
|
223 |
-
|
224 |
-
# Adding dummy character for each box
|
225 |
-
characters.append("X") # Placeholder character
|
226 |
-
|
227 |
-
# Organize text as a single string (assuming each box is a word)
|
228 |
-
# organized_text += "detected_text " # Placeholder text
|
229 |
-
|
230 |
-
# Update class attributes with processed data
|
231 |
-
self.normal_bounds = bounds
|
232 |
-
self.normal_bounds_word = bounds_word
|
233 |
-
self.normal_text_to_box_mapping = text_to_box_mapping
|
234 |
-
self.normal_bounds_flat = bounds_flat # This would be similar to bounds if not processing characters individually
|
235 |
-
self.normal_height = height_flat
|
236 |
-
self.normal_confidences = confidences
|
237 |
-
self.normal_characters = characters
|
238 |
-
self.normal_organized_text = organized_text.strip()
|
239 |
-
|
240 |
-
|
241 |
-
def detect_text_with_trOCR_using_google_bboxes(self, do_use_trOCR, logger):
|
242 |
-
CONFIDENCES = 0.80
|
243 |
-
MAX_NEW_TOKENS = 50
|
244 |
-
|
245 |
-
self.OCR_JSON_to_file = {}
|
246 |
-
|
247 |
-
ocr_parts = ''
|
248 |
-
if not do_use_trOCR:
|
249 |
-
if 'normal' in self.OCR_option:
|
250 |
-
self.OCR_JSON_to_file['OCR_printed'] = self.normal_organized_text
|
251 |
-
# logger.info(f"Google_OCR_Standard:\n{self.normal_organized_text}")
|
252 |
-
# ocr_parts = ocr_parts + f"Google_OCR_Standard:\n{self.normal_organized_text}"
|
253 |
-
ocr_parts = self.normal_organized_text
|
254 |
-
|
255 |
-
if 'hand' in self.OCR_option:
|
256 |
-
self.OCR_JSON_to_file['OCR_handwritten'] = self.hand_organized_text
|
257 |
-
# logger.info(f"Google_OCR_Handwriting:\n{self.hand_organized_text}")
|
258 |
-
# ocr_parts = ocr_parts + f"Google_OCR_Handwriting:\n{self.hand_organized_text}"
|
259 |
-
ocr_parts = self.hand_organized_text
|
260 |
-
|
261 |
-
# if self.OCR_option in ['both',]:
|
262 |
-
# logger.info(f"Google_OCR_Standard:\n{self.normal_organized_text}\n\nGoogle_OCR_Handwriting:\n{self.hand_organized_text}")
|
263 |
-
# return f"Google_OCR_Standard:\n{self.normal_organized_text}\n\nGoogle_OCR_Handwriting:\n{self.hand_organized_text}"
|
264 |
-
return ocr_parts
|
265 |
-
else:
|
266 |
-
logger.info(f'Supplementing with trOCR')
|
267 |
-
|
268 |
-
self.trOCR_texts = []
|
269 |
-
original_image = Image.open(self.path).convert("RGB")
|
270 |
-
|
271 |
-
if 'normal' in self.OCR_option or 'CRAFT' in self.OCR_option:
|
272 |
-
available_bounds = self.normal_bounds_word
|
273 |
-
elif 'hand' in self.OCR_option:
|
274 |
-
available_bounds = self.hand_bounds_word
|
275 |
-
# elif self.OCR_option in ['both',]:
|
276 |
-
# available_bounds = self.hand_bounds_word
|
277 |
-
else:
|
278 |
-
raise
|
279 |
-
|
280 |
-
text_to_box_mapping = []
|
281 |
-
characters = []
|
282 |
-
height = []
|
283 |
-
confidences = []
|
284 |
-
total_b = len(available_bounds)
|
285 |
-
i=0
|
286 |
-
for bound in tqdm(available_bounds, desc="Processing words using Google Vision bboxes"):
|
287 |
-
i+=1
|
288 |
-
if self.json_report:
|
289 |
-
self.json_report.set_text(text_main=f'Working on trOCR :construction: {i}/{total_b}')
|
290 |
-
|
291 |
-
vertices = bound["vertices"]
|
292 |
-
|
293 |
-
left = min([v["x"] for v in vertices])
|
294 |
-
top = min([v["y"] for v in vertices])
|
295 |
-
right = max([v["x"] for v in vertices])
|
296 |
-
bottom = max([v["y"] for v in vertices])
|
297 |
-
|
298 |
-
# Crop image based on Google's bounding box
|
299 |
-
cropped_image = original_image.crop((left, top, right, bottom))
|
300 |
-
pixel_values = self.trOCR_processor(cropped_image, return_tensors="pt").pixel_values
|
301 |
-
|
302 |
-
# Move pixel values to the appropriate device
|
303 |
-
pixel_values = pixel_values.to(self.device)
|
304 |
-
|
305 |
-
generated_ids = self.trOCR_model.generate(pixel_values, max_new_tokens=MAX_NEW_TOKENS)
|
306 |
-
extracted_text = self.trOCR_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
307 |
-
self.trOCR_texts.append(extracted_text)
|
308 |
-
|
309 |
-
# For plotting
|
310 |
-
word_length = max(vertex.get('x') for vertex in vertices) - min(vertex.get('x') for vertex in vertices)
|
311 |
-
num_symbols = len(extracted_text)
|
312 |
-
|
313 |
-
Yw = max(vertex.get('y') for vertex in vertices)
|
314 |
-
Yo = Yw - min(vertex.get('y') for vertex in vertices)
|
315 |
-
X = word_length / num_symbols if num_symbols > 0 else 0
|
316 |
-
H = int(X+(Yo*0.1))
|
317 |
-
height.append(H)
|
318 |
-
|
319 |
-
map_dict = {
|
320 |
-
"vertices": vertices,
|
321 |
-
"text": extracted_text # Use the text extracted by trOCR
|
322 |
-
}
|
323 |
-
text_to_box_mapping.append(map_dict)
|
324 |
-
|
325 |
-
characters.append(extracted_text)
|
326 |
-
confidences.append(CONFIDENCES)
|
327 |
-
|
328 |
-
median_height = statistics.median(height) if height else 0
|
329 |
-
median_heights = [median_height * 1.5] * len(characters)
|
330 |
-
|
331 |
-
self.trOCR_texts = ' '.join(self.trOCR_texts)
|
332 |
-
|
333 |
-
self.trOCR_text_to_box_mapping = text_to_box_mapping
|
334 |
-
self.trOCR_bounds_flat = available_bounds
|
335 |
-
self.trOCR_height = median_heights
|
336 |
-
self.trOCR_confidences = confidences
|
337 |
-
self.trOCR_characters = characters
|
338 |
-
|
339 |
-
if 'normal' in self.OCR_option:
|
340 |
-
self.OCR_JSON_to_file['OCR_printed'] = self.normal_organized_text
|
341 |
-
self.OCR_JSON_to_file['OCR_trOCR'] = self.trOCR_texts
|
342 |
-
# logger.info(f"Google_OCR_Standard:\n{self.normal_organized_text}\n\ntrOCR:\n{self.trOCR_texts}")
|
343 |
-
# ocr_parts = ocr_parts + f"\nGoogle_OCR_Standard:\n{self.normal_organized_text}\n\ntrOCR:\n{self.trOCR_texts}"
|
344 |
-
ocr_parts = self.trOCR_texts
|
345 |
-
if 'hand' in self.OCR_option:
|
346 |
-
self.OCR_JSON_to_file['OCR_handwritten'] = self.hand_organized_text
|
347 |
-
self.OCR_JSON_to_file['OCR_trOCR'] = self.trOCR_texts
|
348 |
-
# logger.info(f"Google_OCR_Handwriting:\n{self.hand_organized_text}\n\ntrOCR:\n{self.trOCR_texts}")
|
349 |
-
# ocr_parts = ocr_parts + f"\nGoogle_OCR_Handwriting:\n{self.hand_organized_text}\n\ntrOCR:\n{self.trOCR_texts}"
|
350 |
-
ocr_parts = self.trOCR_texts
|
351 |
-
# if self.OCR_option in ['both',]:
|
352 |
-
# self.OCR_JSON_to_file['OCR_printed'] = self.normal_organized_text
|
353 |
-
# self.OCR_JSON_to_file['OCR_handwritten'] = self.hand_organized_text
|
354 |
-
# self.OCR_JSON_to_file['OCR_trOCR'] = self.trOCR_texts
|
355 |
-
# logger.info(f"Google_OCR_Standard:\n{self.normal_organized_text}\n\nGoogle_OCR_Handwriting:\n{self.hand_organized_text}\n\ntrOCR:\n{self.trOCR_texts}")
|
356 |
-
# ocr_parts = ocr_parts + f"\nGoogle_OCR_Standard:\n{self.normal_organized_text}\n\nGoogle_OCR_Handwriting:\n{self.hand_organized_text}\n\ntrOCR:\n{self.trOCR_texts}"
|
357 |
-
if 'CRAFT' in self.OCR_option:
|
358 |
-
# self.OCR_JSON_to_file['OCR_printed'] = self.normal_organized_text
|
359 |
-
self.OCR_JSON_to_file['OCR_CRAFT_trOCR'] = self.trOCR_texts
|
360 |
-
# logger.info(f"CRAFT_trOCR:\n{self.trOCR_texts}")
|
361 |
-
# ocr_parts = ocr_parts + f"\nCRAFT_trOCR:\n{self.trOCR_texts}"
|
362 |
-
ocr_parts = self.trOCR_texts
|
363 |
-
return ocr_parts
|
364 |
-
|
365 |
-
@staticmethod
|
366 |
-
def confidence_to_color(confidence):
|
367 |
-
hue = (confidence - 0.5) * 120 / 0.5
|
368 |
-
r, g, b = colorsys.hls_to_rgb(hue/360, 0.5, 1)
|
369 |
-
return (int(r*255), int(g*255), int(b*255))
|
370 |
-
|
371 |
-
|
372 |
-
def render_text_on_black_image(self, option):
|
373 |
-
bounds_flat = getattr(self, f'{option}_bounds_flat', [])
|
374 |
-
heights = getattr(self, f'{option}_height', [])
|
375 |
-
confidences = getattr(self, f'{option}_confidences', [])
|
376 |
-
characters = getattr(self, f'{option}_characters', [])
|
377 |
-
|
378 |
-
original_image = Image.open(self.path)
|
379 |
-
width, height = original_image.size
|
380 |
-
black_image = Image.new("RGB", (width, height), "black")
|
381 |
-
draw = ImageDraw.Draw(black_image)
|
382 |
-
|
383 |
-
for bound, confidence, char_height, character in zip(bounds_flat, confidences, heights, characters):
|
384 |
-
font_size = int(char_height)
|
385 |
-
try:
|
386 |
-
font = ImageFont.truetype("arial.ttf", font_size)
|
387 |
-
except:
|
388 |
-
font = ImageFont.load_default().font_variant(size=font_size)
|
389 |
-
if option == 'trOCR':
|
390 |
-
color = (0, 170, 255)
|
391 |
-
else:
|
392 |
-
color = OCREngine.confidence_to_color(confidence)
|
393 |
-
position = (bound["vertices"][0]["x"], bound["vertices"][0]["y"] - char_height)
|
394 |
-
draw.text(position, character, fill=color, font=font)
|
395 |
-
|
396 |
-
return black_image
|
397 |
-
|
398 |
-
|
399 |
-
def merge_images(self, image1, image2):
|
400 |
-
width1, height1 = image1.size
|
401 |
-
width2, height2 = image2.size
|
402 |
-
merged_image = Image.new("RGB", (width1 + width2, max([height1, height2])))
|
403 |
-
merged_image.paste(image1, (0, 0))
|
404 |
-
merged_image.paste(image2, (width1, 0))
|
405 |
-
return merged_image
|
406 |
-
|
407 |
-
|
408 |
-
def draw_boxes(self, option):
|
409 |
-
bounds = getattr(self, f'{option}_bounds', [])
|
410 |
-
bounds_word = getattr(self, f'{option}_bounds_word', [])
|
411 |
-
confidences = getattr(self, f'{option}_confidences', [])
|
412 |
-
|
413 |
-
draw = ImageDraw.Draw(self.image)
|
414 |
-
width, height = self.image.size
|
415 |
-
if min([width, height]) > 4000:
|
416 |
-
line_width_thick = int((width + height) / 2 * 0.0025) # Adjust line width for character level
|
417 |
-
line_width_thin = 1
|
418 |
-
else:
|
419 |
-
line_width_thick = int((width + height) / 2 * 0.005) # Adjust line width for character level
|
420 |
-
line_width_thin = 1 #int((width + height) / 2 * 0.001)
|
421 |
-
|
422 |
-
for bound in bounds_word:
|
423 |
-
draw.polygon(
|
424 |
-
[
|
425 |
-
bound["vertices"][0]["x"], bound["vertices"][0]["y"],
|
426 |
-
bound["vertices"][1]["x"], bound["vertices"][1]["y"],
|
427 |
-
bound["vertices"][2]["x"], bound["vertices"][2]["y"],
|
428 |
-
bound["vertices"][3]["x"], bound["vertices"][3]["y"],
|
429 |
-
],
|
430 |
-
outline=OCREngine.BBOX_COLOR,
|
431 |
-
width=line_width_thin
|
432 |
-
)
|
433 |
-
|
434 |
-
# Draw a line segment at the bottom of each handwritten character
|
435 |
-
for bound, confidence in zip(bounds, confidences):
|
436 |
-
color = OCREngine.confidence_to_color(confidence)
|
437 |
-
# Use the bottom two vertices of the bounding box for the line
|
438 |
-
bottom_left = (bound["vertices"][3]["x"], bound["vertices"][3]["y"] + line_width_thick)
|
439 |
-
bottom_right = (bound["vertices"][2]["x"], bound["vertices"][2]["y"] + line_width_thick)
|
440 |
-
draw.line([bottom_left, bottom_right], fill=color, width=line_width_thick)
|
441 |
-
|
442 |
-
return self.image
|
443 |
-
|
444 |
-
|
445 |
-
def detect_text(self):
|
446 |
-
|
447 |
-
with io.open(self.path, 'rb') as image_file:
|
448 |
-
content = image_file.read()
|
449 |
-
image = vision.Image(content=content)
|
450 |
-
response = self.client.document_text_detection(image=image)
|
451 |
-
texts = response.text_annotations
|
452 |
-
|
453 |
-
if response.error.message:
|
454 |
-
raise Exception(
|
455 |
-
'{}\nFor more info on error messages, check: '
|
456 |
-
'https://cloud.google.com/apis/design/errors'.format(
|
457 |
-
response.error.message))
|
458 |
-
|
459 |
-
bounds = []
|
460 |
-
bounds_word = []
|
461 |
-
text_to_box_mapping = []
|
462 |
-
bounds_flat = []
|
463 |
-
height_flat = []
|
464 |
-
confidences = []
|
465 |
-
characters = []
|
466 |
-
organized_text = ""
|
467 |
-
paragraph_count = 0
|
468 |
-
|
469 |
-
for text in texts[1:]:
|
470 |
-
vertices = [{"x": vertex.x, "y": vertex.y} for vertex in text.bounding_poly.vertices]
|
471 |
-
map_dict = {
|
472 |
-
"vertices": vertices,
|
473 |
-
"text": text.description
|
474 |
-
}
|
475 |
-
text_to_box_mapping.append(map_dict)
|
476 |
-
|
477 |
-
for page in response.full_text_annotation.pages:
|
478 |
-
for block in page.blocks:
|
479 |
-
# paragraph_count += 1
|
480 |
-
# organized_text += f'OCR_paragraph_{paragraph_count}:\n' # Add paragraph label
|
481 |
-
for paragraph in block.paragraphs:
|
482 |
-
|
483 |
-
avg_H_list = []
|
484 |
-
for word in paragraph.words:
|
485 |
-
Yw = max(vertex.y for vertex in word.bounding_box.vertices)
|
486 |
-
# Calculate the width of the word and divide by the number of symbols
|
487 |
-
word_length = max(vertex.x for vertex in word.bounding_box.vertices) - min(vertex.x for vertex in word.bounding_box.vertices)
|
488 |
-
num_symbols = len(word.symbols)
|
489 |
-
if num_symbols <= 3:
|
490 |
-
H = int(Yw - min(vertex.y for vertex in word.bounding_box.vertices))
|
491 |
-
else:
|
492 |
-
Yo = Yw - min(vertex.y for vertex in word.bounding_box.vertices)
|
493 |
-
X = word_length / num_symbols if num_symbols > 0 else 0
|
494 |
-
H = int(X+(Yo*0.1))
|
495 |
-
avg_H_list.append(H)
|
496 |
-
avg_H = int(mean(avg_H_list))
|
497 |
-
|
498 |
-
words_in_para = []
|
499 |
-
for word in paragraph.words:
|
500 |
-
# Get word-level bounding box
|
501 |
-
bound_word_dict = {
|
502 |
-
"vertices": [
|
503 |
-
{"x": vertex.x, "y": vertex.y} for vertex in word.bounding_box.vertices
|
504 |
-
]
|
505 |
-
}
|
506 |
-
bounds_word.append(bound_word_dict)
|
507 |
-
|
508 |
-
Y = max(vertex.y for vertex in word.bounding_box.vertices)
|
509 |
-
word_x_start = min(vertex.x for vertex in word.bounding_box.vertices)
|
510 |
-
word_x_end = max(vertex.x for vertex in word.bounding_box.vertices)
|
511 |
-
num_symbols = len(word.symbols)
|
512 |
-
symbol_width = (word_x_end - word_x_start) / num_symbols if num_symbols > 0 else 0
|
513 |
-
|
514 |
-
current_x_position = word_x_start
|
515 |
-
|
516 |
-
characters_ind = []
|
517 |
-
for symbol in word.symbols:
|
518 |
-
bound_dict = {
|
519 |
-
"vertices": [
|
520 |
-
{"x": vertex.x, "y": vertex.y} for vertex in symbol.bounding_box.vertices
|
521 |
-
]
|
522 |
-
}
|
523 |
-
bounds.append(bound_dict)
|
524 |
-
|
525 |
-
# Create flat bounds with adjusted x position
|
526 |
-
bounds_flat_dict = {
|
527 |
-
"vertices": [
|
528 |
-
{"x": current_x_position, "y": Y},
|
529 |
-
{"x": current_x_position + symbol_width, "y": Y}
|
530 |
-
]
|
531 |
-
}
|
532 |
-
bounds_flat.append(bounds_flat_dict)
|
533 |
-
current_x_position += symbol_width
|
534 |
-
|
535 |
-
height_flat.append(avg_H)
|
536 |
-
confidences.append(round(symbol.confidence, 4))
|
537 |
-
|
538 |
-
characters_ind.append(symbol.text)
|
539 |
-
characters.append(symbol.text)
|
540 |
-
|
541 |
-
words_in_para.append(''.join(characters_ind))
|
542 |
-
paragraph_text = ' '.join(words_in_para) # Join words in paragraph
|
543 |
-
organized_text += paragraph_text + ' ' #+ '\n'
|
544 |
-
|
545 |
-
# median_height = statistics.median(height_flat) if height_flat else 0
|
546 |
-
# median_heights = [median_height] * len(characters)
|
547 |
-
|
548 |
-
self.normal_cleaned_text = texts[0].description if texts else ''
|
549 |
-
self.normal_organized_text = organized_text
|
550 |
-
self.normal_bounds = bounds
|
551 |
-
self.normal_bounds_word = bounds_word
|
552 |
-
self.normal_text_to_box_mapping = text_to_box_mapping
|
553 |
-
self.normal_bounds_flat = bounds_flat
|
554 |
-
# self.normal_height = median_heights #height_flat
|
555 |
-
self.normal_height = height_flat
|
556 |
-
self.normal_confidences = confidences
|
557 |
-
self.normal_characters = characters
|
558 |
-
return self.normal_cleaned_text
|
559 |
-
|
560 |
-
|
561 |
-
def detect_handwritten_ocr(self):
|
562 |
-
|
563 |
-
with open(self.path, "rb") as image_file:
|
564 |
-
content = image_file.read()
|
565 |
-
|
566 |
-
image = vision_beta.Image(content=content)
|
567 |
-
image_context = vision_beta.ImageContext(language_hints=["en-t-i0-handwrit"])
|
568 |
-
response = self.client_beta.document_text_detection(image=image, image_context=image_context)
|
569 |
-
texts = response.text_annotations
|
570 |
-
|
571 |
-
if response.error.message:
|
572 |
-
raise Exception(
|
573 |
-
"{}\nFor more info on error messages, check: "
|
574 |
-
"https://cloud.google.com/apis/design/errors".format(response.error.message)
|
575 |
-
)
|
576 |
-
|
577 |
-
bounds = []
|
578 |
-
bounds_word = []
|
579 |
-
bounds_flat = []
|
580 |
-
height_flat = []
|
581 |
-
confidences = []
|
582 |
-
characters = []
|
583 |
-
organized_text = ""
|
584 |
-
paragraph_count = 0
|
585 |
-
text_to_box_mapping = []
|
586 |
-
|
587 |
-
for text in texts[1:]:
|
588 |
-
vertices = [{"x": vertex.x, "y": vertex.y} for vertex in text.bounding_poly.vertices]
|
589 |
-
map_dict = {
|
590 |
-
"vertices": vertices,
|
591 |
-
"text": text.description
|
592 |
-
}
|
593 |
-
text_to_box_mapping.append(map_dict)
|
594 |
-
|
595 |
-
for page in response.full_text_annotation.pages:
|
596 |
-
for block in page.blocks:
|
597 |
-
# paragraph_count += 1
|
598 |
-
# organized_text += f'\nOCR_paragraph_{paragraph_count}:\n' # Add paragraph label
|
599 |
-
for paragraph in block.paragraphs:
|
600 |
-
|
601 |
-
avg_H_list = []
|
602 |
-
for word in paragraph.words:
|
603 |
-
Yw = max(vertex.y for vertex in word.bounding_box.vertices)
|
604 |
-
# Calculate the width of the word and divide by the number of symbols
|
605 |
-
word_length = max(vertex.x for vertex in word.bounding_box.vertices) - min(vertex.x for vertex in word.bounding_box.vertices)
|
606 |
-
num_symbols = len(word.symbols)
|
607 |
-
if num_symbols <= 3:
|
608 |
-
H = int(Yw - min(vertex.y for vertex in word.bounding_box.vertices))
|
609 |
-
else:
|
610 |
-
Yo = Yw - min(vertex.y for vertex in word.bounding_box.vertices)
|
611 |
-
X = word_length / num_symbols if num_symbols > 0 else 0
|
612 |
-
H = int(X+(Yo*0.1))
|
613 |
-
avg_H_list.append(H)
|
614 |
-
avg_H = int(mean(avg_H_list))
|
615 |
-
|
616 |
-
words_in_para = []
|
617 |
-
for word in paragraph.words:
|
618 |
-
# Get word-level bounding box
|
619 |
-
bound_word_dict = {
|
620 |
-
"vertices": [
|
621 |
-
{"x": vertex.x, "y": vertex.y} for vertex in word.bounding_box.vertices
|
622 |
-
]
|
623 |
-
}
|
624 |
-
bounds_word.append(bound_word_dict)
|
625 |
-
|
626 |
-
Y = max(vertex.y for vertex in word.bounding_box.vertices)
|
627 |
-
word_x_start = min(vertex.x for vertex in word.bounding_box.vertices)
|
628 |
-
word_x_end = max(vertex.x for vertex in word.bounding_box.vertices)
|
629 |
-
num_symbols = len(word.symbols)
|
630 |
-
symbol_width = (word_x_end - word_x_start) / num_symbols if num_symbols > 0 else 0
|
631 |
-
|
632 |
-
current_x_position = word_x_start
|
633 |
-
|
634 |
-
characters_ind = []
|
635 |
-
for symbol in word.symbols:
|
636 |
-
bound_dict = {
|
637 |
-
"vertices": [
|
638 |
-
{"x": vertex.x, "y": vertex.y} for vertex in symbol.bounding_box.vertices
|
639 |
-
]
|
640 |
-
}
|
641 |
-
bounds.append(bound_dict)
|
642 |
-
|
643 |
-
# Create flat bounds with adjusted x position
|
644 |
-
bounds_flat_dict = {
|
645 |
-
"vertices": [
|
646 |
-
{"x": current_x_position, "y": Y},
|
647 |
-
{"x": current_x_position + symbol_width, "y": Y}
|
648 |
-
]
|
649 |
-
}
|
650 |
-
bounds_flat.append(bounds_flat_dict)
|
651 |
-
current_x_position += symbol_width
|
652 |
-
|
653 |
-
height_flat.append(avg_H)
|
654 |
-
confidences.append(round(symbol.confidence, 4))
|
655 |
-
|
656 |
-
characters_ind.append(symbol.text)
|
657 |
-
characters.append(symbol.text)
|
658 |
-
|
659 |
-
words_in_para.append(''.join(characters_ind))
|
660 |
-
paragraph_text = ' '.join(words_in_para) # Join words in paragraph
|
661 |
-
organized_text += paragraph_text + ' ' #+ '\n'
|
662 |
-
|
663 |
-
# median_height = statistics.median(height_flat) if height_flat else 0
|
664 |
-
# median_heights = [median_height] * len(characters)
|
665 |
-
|
666 |
-
self.hand_cleaned_text = response.text_annotations[0].description if response.text_annotations else ''
|
667 |
-
self.hand_organized_text = organized_text
|
668 |
-
self.hand_bounds = bounds
|
669 |
-
self.hand_bounds_word = bounds_word
|
670 |
-
self.hand_bounds_flat = bounds_flat
|
671 |
-
self.hand_text_to_box_mapping = text_to_box_mapping
|
672 |
-
# self.hand_height = median_heights #height_flat
|
673 |
-
self.hand_height = height_flat
|
674 |
-
self.hand_confidences = confidences
|
675 |
-
self.hand_characters = characters
|
676 |
-
return self.hand_cleaned_text
|
677 |
-
|
678 |
-
|
679 |
-
def process_image(self, do_create_OCR_helper_image, logger):
|
680 |
-
# Can stack options, so solitary if statements
|
681 |
-
self.OCR = 'OCR:\n'
|
682 |
-
if 'CRAFT' in self.OCR_option:
|
683 |
-
self.do_use_trOCR = True
|
684 |
-
self.detect_text_craft()
|
685 |
-
### Optionally add trOCR to the self.OCR for additional context
|
686 |
-
if self.double_OCR:
|
687 |
-
part_OCR = "\CRAFT trOCR:\n" + self.detect_text_with_trOCR_using_google_bboxes(self.do_use_trOCR, logger)
|
688 |
-
self.OCR = self.OCR + part_OCR + part_OCR
|
689 |
-
else:
|
690 |
-
self.OCR = self.OCR + "\CRAFT trOCR:\n" + self.detect_text_with_trOCR_using_google_bboxes(self.do_use_trOCR, logger)
|
691 |
-
# logger.info(f"CRAFT trOCR:\n{self.OCR}")
|
692 |
-
|
693 |
-
if 'LLaVA' in self.OCR_option: # This option does not produce an OCR helper image
|
694 |
-
if self.json_report:
|
695 |
-
self.json_report.set_text(text_main=f'Working on LLaVA {self.Llava.model_path} transcription :construction:')
|
696 |
-
|
697 |
-
image, json_output, direct_output, str_output, usage_report = self.Llava.transcribe_image(self.path, self.multimodal_prompt)
|
698 |
-
self.logger.info(f"LLaVA Usage Report for Model {self.Llava.model_path}:\n{usage_report}")
|
699 |
-
|
700 |
-
try:
|
701 |
-
self.OCR_JSON_to_file['OCR_LLaVA'] = str_output
|
702 |
-
except:
|
703 |
-
self.OCR_JSON_to_file = {}
|
704 |
-
self.OCR_JSON_to_file['OCR_LLaVA'] = str_output
|
705 |
-
|
706 |
-
if self.double_OCR:
|
707 |
-
self.OCR = self.OCR + f"\nLLaVA OCR:\n{str_output}" + f"\nLLaVA OCR:\n{str_output}"
|
708 |
-
else:
|
709 |
-
self.OCR = self.OCR + f"\nLLaVA OCR:\n{str_output}"
|
710 |
-
# logger.info(f"LLaVA OCR:\n{self.OCR}")
|
711 |
-
|
712 |
-
if 'normal' in self.OCR_option or 'hand' in self.OCR_option:
|
713 |
-
if 'normal' in self.OCR_option:
|
714 |
-
if self.double_OCR:
|
715 |
-
part_OCR = self.OCR + "\nGoogle Printed OCR:\n" + self.detect_text()
|
716 |
-
self.OCR = self.OCR + part_OCR + part_OCR
|
717 |
-
else:
|
718 |
-
self.OCR = self.OCR + "\nGoogle Printed OCR:\n" + self.detect_text()
|
719 |
-
if 'hand' in self.OCR_option:
|
720 |
-
if self.double_OCR:
|
721 |
-
part_OCR = self.OCR + "\nGoogle Handwritten OCR:\n" + self.detect_handwritten_ocr()
|
722 |
-
self.OCR = self.OCR + part_OCR + part_OCR
|
723 |
-
else:
|
724 |
-
self.OCR = self.OCR + "\nGoogle Handwritten OCR:\n" + self.detect_handwritten_ocr()
|
725 |
-
# if self.OCR_option not in ['normal', 'hand', 'both']:
|
726 |
-
# self.OCR_option = 'both'
|
727 |
-
# self.detect_text()
|
728 |
-
# self.detect_handwritten_ocr()
|
729 |
-
|
730 |
-
### Optionally add trOCR to the self.OCR for additional context
|
731 |
-
if self.do_use_trOCR:
|
732 |
-
if self.double_OCR:
|
733 |
-
part_OCR = "\ntrOCR:\n" + self.detect_text_with_trOCR_using_google_bboxes(self.do_use_trOCR, logger)
|
734 |
-
self.OCR = self.OCR + part_OCR + part_OCR
|
735 |
-
else:
|
736 |
-
self.OCR = self.OCR + "\ntrOCR:\n" + self.detect_text_with_trOCR_using_google_bboxes(self.do_use_trOCR, logger)
|
737 |
-
# logger.info(f"OCR:\n{self.OCR}")
|
738 |
-
else:
|
739 |
-
# populate self.OCR_JSON_to_file = {}
|
740 |
-
_ = self.detect_text_with_trOCR_using_google_bboxes(self.do_use_trOCR, logger)
|
741 |
-
|
742 |
-
|
743 |
-
if do_create_OCR_helper_image and ('LLaVA' not in self.OCR_option):
|
744 |
-
self.image = Image.open(self.path)
|
745 |
-
|
746 |
-
if 'normal' in self.OCR_option:
|
747 |
-
image_with_boxes_normal = self.draw_boxes('normal')
|
748 |
-
text_image_normal = self.render_text_on_black_image('normal')
|
749 |
-
self.merged_image_normal = self.merge_images(image_with_boxes_normal, text_image_normal)
|
750 |
-
|
751 |
-
if 'hand' in self.OCR_option:
|
752 |
-
image_with_boxes_hand = self.draw_boxes('hand')
|
753 |
-
text_image_hand = self.render_text_on_black_image('hand')
|
754 |
-
self.merged_image_hand = self.merge_images(image_with_boxes_hand, text_image_hand)
|
755 |
-
|
756 |
-
if self.do_use_trOCR:
|
757 |
-
text_image_trOCR = self.render_text_on_black_image('trOCR')
|
758 |
-
|
759 |
-
if 'CRAFT' in self.OCR_option:
|
760 |
-
image_with_boxes_normal = self.draw_boxes('normal')
|
761 |
-
self.merged_image_normal = self.merge_images(image_with_boxes_normal, text_image_trOCR)
|
762 |
-
|
763 |
-
### Merge final overlay image
|
764 |
-
### [original, normal bboxes, normal text]
|
765 |
-
if 'CRAFT' in self.OCR_option or 'normal' in self.OCR_option:
|
766 |
-
self.overlay_image = self.merge_images(Image.open(self.path), self.merged_image_normal)
|
767 |
-
### [original, hand bboxes, hand text]
|
768 |
-
elif 'hand' in self.OCR_option:
|
769 |
-
self.overlay_image = self.merge_images(Image.open(self.path), self.merged_image_hand)
|
770 |
-
### [original, normal bboxes, normal text, hand bboxes, hand text]
|
771 |
-
else:
|
772 |
-
self.overlay_image = self.merge_images(Image.open(self.path), self.merge_images(self.merged_image_normal, self.merged_image_hand))
|
773 |
-
|
774 |
-
if self.do_use_trOCR:
|
775 |
-
if 'CRAFT' in self.OCR_option:
|
776 |
-
heat_map_text = Image.fromarray(cv2.cvtColor(self.prediction_result["heatmaps"]["text_score_heatmap"], cv2.COLOR_BGR2RGB))
|
777 |
-
heat_map_link = Image.fromarray(cv2.cvtColor(self.prediction_result["heatmaps"]["link_score_heatmap"], cv2.COLOR_BGR2RGB))
|
778 |
-
self.overlay_image = self.merge_images(self.overlay_image, heat_map_text)
|
779 |
-
self.overlay_image = self.merge_images(self.overlay_image, heat_map_link)
|
780 |
-
|
781 |
-
else:
|
782 |
-
self.overlay_image = self.merge_images(self.overlay_image, text_image_trOCR)
|
783 |
-
|
784 |
-
else:
|
785 |
-
self.merged_image_normal = None
|
786 |
-
self.merged_image_hand = None
|
787 |
-
self.overlay_image = Image.open(self.path)
|
788 |
-
|
789 |
-
try:
|
790 |
-
from craft_text_detector import empty_cuda_cache
|
791 |
-
empty_cuda_cache()
|
792 |
-
except:
|
793 |
-
pass
|
794 |
-
|
795 |
-
class SafetyCheck():
|
796 |
-
def __init__(self, is_hf) -> None:
|
797 |
-
self.is_hf = is_hf
|
798 |
-
self.set_client()
|
799 |
-
|
800 |
-
def set_client(self):
|
801 |
-
if self.is_hf:
|
802 |
-
self.client = vision.ImageAnnotatorClient(credentials=self.get_google_credentials())
|
803 |
-
else:
|
804 |
-
self.client = vision.ImageAnnotatorClient(credentials=self.get_google_credentials())
|
805 |
-
|
806 |
-
def get_google_credentials(self):
|
807 |
-
creds_json_str = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
|
808 |
-
credentials = service_account.Credentials.from_service_account_info(json.loads(creds_json_str))
|
809 |
-
return credentials
|
810 |
-
|
811 |
-
def check_for_inappropriate_content(self, file_stream):
|
812 |
-
try:
|
813 |
-
LEVEL = 2
|
814 |
-
# content = file_stream.read()
|
815 |
-
file_stream.seek(0) # Reset file stream position to the beginning
|
816 |
-
content = file_stream.read()
|
817 |
-
image = vision.Image(content=content)
|
818 |
-
response = self.client.safe_search_detection(image=image)
|
819 |
-
safe = response.safe_search_annotation
|
820 |
-
|
821 |
-
likelihood_name = (
|
822 |
-
"UNKNOWN",
|
823 |
-
"VERY_UNLIKELY",
|
824 |
-
"UNLIKELY",
|
825 |
-
"POSSIBLE",
|
826 |
-
"LIKELY",
|
827 |
-
"VERY_LIKELY",
|
828 |
-
)
|
829 |
-
print("Safe search:")
|
830 |
-
|
831 |
-
print(f" adult*: {likelihood_name[safe.adult]}")
|
832 |
-
print(f" medical*: {likelihood_name[safe.medical]}")
|
833 |
-
print(f" spoofed: {likelihood_name[safe.spoof]}")
|
834 |
-
print(f" violence*: {likelihood_name[safe.violence]}")
|
835 |
-
print(f" racy: {likelihood_name[safe.racy]}")
|
836 |
-
|
837 |
-
# Check the levels of adult, violence, racy, etc. content.
|
838 |
-
if (safe.adult > LEVEL or
|
839 |
-
safe.medical > LEVEL or
|
840 |
-
# safe.spoof > LEVEL or
|
841 |
-
safe.violence > LEVEL #or
|
842 |
-
# safe.racy > LEVEL
|
843 |
-
):
|
844 |
-
print("Found violation")
|
845 |
-
return True # The image violates safe search guidelines.
|
846 |
-
|
847 |
-
print("Found NO violation")
|
848 |
-
return False # The image is considered safe.
|
849 |
-
except:
|
850 |
-
return False # The image is considered safe. TEMPOROARY FIX TODO
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|