Spaces:
Running
Running
phyloforfun
commited on
Merge branch 'main' of https://huggingface.co/spaces/phyloforfun/VoucherVision
Browse files- app.py +17 -14
- vouchervision/OCR_google_cloud_vision.py +39 -35
app.py
CHANGED
@@ -218,10 +218,10 @@ if 'dir_images_local_TEMP' not in st.session_state:
|
|
218 |
st.session_state['dir_images_local_TEMP'] = False
|
219 |
if 'dir_uploaded_images' not in st.session_state:
|
220 |
st.session_state['dir_uploaded_images'] = os.path.join(st.session_state.dir_home,'uploads')
|
221 |
-
validate_dir(
|
222 |
if 'dir_uploaded_images_small' not in st.session_state:
|
223 |
st.session_state['dir_uploaded_images_small'] = os.path.join(st.session_state.dir_home,'uploads_small')
|
224 |
-
validate_dir(
|
225 |
|
226 |
|
227 |
|
@@ -264,18 +264,16 @@ def handle_image_upload_and_gallery_hf(uploaded_files):
|
|
264 |
|
265 |
ind_small = 0
|
266 |
for uploaded_file in uploaded_files:
|
267 |
-
|
268 |
if SAFE.check_for_inappropriate_content(uploaded_file):
|
269 |
clear_image_uploads()
|
270 |
report_violation(uploaded_file.name, is_hf=st.session_state['is_hf'])
|
271 |
st.error("Warning: You uploaded an image that violates our terms of service.")
|
272 |
-
return True
|
273 |
|
274 |
|
275 |
# Determine the file type
|
276 |
if uploaded_file.name.lower().endswith('.pdf'):
|
277 |
# Handle PDF files
|
278 |
-
file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file)
|
279 |
# Convert each page of the PDF to an image
|
280 |
n_pages = convert_pdf_to_jpg(file_path, st.session_state['dir_uploaded_images'], dpi=200)#st.session_state.config['leafmachine']['project']['dir_images_local'])
|
281 |
# Update the input list for each page image
|
@@ -290,22 +288,27 @@ def handle_image_upload_and_gallery_hf(uploaded_files):
|
|
290 |
# Optionally, create a thumbnail for the gallery
|
291 |
img = Image.open(jpg_file_path)
|
292 |
img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
|
293 |
-
|
294 |
file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], file_name, img)
|
295 |
-
|
296 |
file_path_small = save_uploaded_file_local(st.session_state['dir_uploaded_images_small'],st.session_state['dir_uploaded_images_small'], file_name, img)
|
297 |
st.session_state['input_list_small'].append(file_path_small)
|
298 |
|
299 |
else:
|
300 |
ind_small += 1
|
301 |
# Handle JPG/JPEG files (existing process)
|
302 |
-
file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file)
|
|
|
|
|
|
|
|
|
|
|
303 |
st.session_state['input_list'].append(file_path)
|
304 |
-
if ind_small < MAX_GALLERY_IMAGES +5:
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
|
310 |
# After processing all files
|
311 |
st.session_state.config['leafmachine']['project']['dir_images_local'] = st.session_state['dir_uploaded_images']
|
@@ -393,7 +396,7 @@ def content_input_images(col_left, col_right):
|
|
393 |
|
394 |
with col_right:
|
395 |
if st.session_state.is_hf:
|
396 |
-
|
397 |
|
398 |
else:
|
399 |
st.session_state['view_local_gallery'] = st.toggle("View Image Gallery",)
|
|
|
218 |
st.session_state['dir_images_local_TEMP'] = False
|
219 |
if 'dir_uploaded_images' not in st.session_state:
|
220 |
st.session_state['dir_uploaded_images'] = os.path.join(st.session_state.dir_home,'uploads')
|
221 |
+
validate_dir(st.session_state['dir_uploaded_images'])
|
222 |
if 'dir_uploaded_images_small' not in st.session_state:
|
223 |
st.session_state['dir_uploaded_images_small'] = os.path.join(st.session_state.dir_home,'uploads_small')
|
224 |
+
validate_dir(st.session_state['dir_uploaded_images_small'])
|
225 |
|
226 |
|
227 |
|
|
|
264 |
|
265 |
ind_small = 0
|
266 |
for uploaded_file in uploaded_files:
|
|
|
267 |
if SAFE.check_for_inappropriate_content(uploaded_file):
|
268 |
clear_image_uploads()
|
269 |
report_violation(uploaded_file.name, is_hf=st.session_state['is_hf'])
|
270 |
st.error("Warning: You uploaded an image that violates our terms of service.")
|
|
|
271 |
|
272 |
|
273 |
# Determine the file type
|
274 |
if uploaded_file.name.lower().endswith('.pdf'):
|
275 |
# Handle PDF files
|
276 |
+
file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file, image=None)
|
277 |
# Convert each page of the PDF to an image
|
278 |
n_pages = convert_pdf_to_jpg(file_path, st.session_state['dir_uploaded_images'], dpi=200)#st.session_state.config['leafmachine']['project']['dir_images_local'])
|
279 |
# Update the input list for each page image
|
|
|
288 |
# Optionally, create a thumbnail for the gallery
|
289 |
img = Image.open(jpg_file_path)
|
290 |
img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
|
291 |
+
if st.session_state['is_hf']:
|
292 |
file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], file_name, img)
|
293 |
+
else:
|
294 |
file_path_small = save_uploaded_file_local(st.session_state['dir_uploaded_images_small'],st.session_state['dir_uploaded_images_small'], file_name, img)
|
295 |
st.session_state['input_list_small'].append(file_path_small)
|
296 |
|
297 |
else:
|
298 |
ind_small += 1
|
299 |
# Handle JPG/JPEG files (existing process)
|
300 |
+
# file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file, image=None) ######### Yale TODO
|
301 |
+
# file_path = os.path.join(st.session_state['dir_uploaded_images'], uploaded_file.name)
|
302 |
+
image = Image.open(uploaded_file)
|
303 |
+
file_path = os.path.join(st.session_state['dir_uploaded_images'], uploaded_file.name)
|
304 |
+
image.save(file_path, "JPEG")
|
305 |
+
|
306 |
st.session_state['input_list'].append(file_path)
|
307 |
+
# if ind_small < MAX_GALLERY_IMAGES +5:
|
308 |
+
# img = Image.open(file_path)
|
309 |
+
# img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
|
310 |
+
# file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], uploaded_file, img)
|
311 |
+
# st.session_state['input_list_small'].append(file_path_small)
|
312 |
|
313 |
# After processing all files
|
314 |
st.session_state.config['leafmachine']['project']['dir_images_local'] = st.session_state['dir_uploaded_images']
|
|
|
396 |
|
397 |
with col_right:
|
398 |
if st.session_state.is_hf:
|
399 |
+
handle_image_upload_and_gallery_hf(uploaded_files)
|
400 |
|
401 |
else:
|
402 |
st.session_state['view_local_gallery'] = st.toggle("View Image Gallery",)
|
vouchervision/OCR_google_cloud_vision.py
CHANGED
@@ -824,44 +824,48 @@ class SafetyCheck():
|
|
824 |
else:
|
825 |
self.client = vision.ImageAnnotatorClient(credentials=self.get_google_credentials())
|
826 |
|
827 |
-
|
828 |
def get_google_credentials(self):
|
829 |
creds_json_str = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
|
830 |
credentials = service_account.Credentials.from_service_account_info(json.loads(creds_json_str))
|
831 |
return credentials
|
832 |
|
833 |
def check_for_inappropriate_content(self, file_stream):
|
834 |
-
|
835 |
-
|
836 |
-
|
837 |
-
|
838 |
-
|
839 |
-
|
840 |
-
|
841 |
-
|
842 |
-
|
843 |
-
|
844 |
-
|
845 |
-
|
846 |
-
|
847 |
-
|
848 |
-
|
849 |
-
|
850 |
-
|
851 |
-
|
852 |
-
|
853 |
-
|
854 |
-
|
855 |
-
|
856 |
-
|
857 |
-
|
858 |
-
|
859 |
-
#
|
860 |
-
safe.
|
861 |
-
|
862 |
-
|
863 |
-
|
864 |
-
|
865 |
-
|
866 |
-
|
867 |
-
|
|
|
|
|
|
|
|
|
|
|
|
824 |
else:
|
825 |
self.client = vision.ImageAnnotatorClient(credentials=self.get_google_credentials())
|
826 |
|
|
|
827 |
def get_google_credentials(self):
|
828 |
creds_json_str = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
|
829 |
credentials = service_account.Credentials.from_service_account_info(json.loads(creds_json_str))
|
830 |
return credentials
|
831 |
|
832 |
def check_for_inappropriate_content(self, file_stream):
|
833 |
+
try:
|
834 |
+
LEVEL = 2
|
835 |
+
# content = file_stream.read()
|
836 |
+
file_stream.seek(0) # Reset file stream position to the beginning
|
837 |
+
content = file_stream.read()
|
838 |
+
image = vision.Image(content=content)
|
839 |
+
response = self.client.safe_search_detection(image=image)
|
840 |
+
safe = response.safe_search_annotation
|
841 |
+
|
842 |
+
likelihood_name = (
|
843 |
+
"UNKNOWN",
|
844 |
+
"VERY_UNLIKELY",
|
845 |
+
"UNLIKELY",
|
846 |
+
"POSSIBLE",
|
847 |
+
"LIKELY",
|
848 |
+
"VERY_LIKELY",
|
849 |
+
)
|
850 |
+
print("Safe search:")
|
851 |
+
|
852 |
+
print(f" adult*: {likelihood_name[safe.adult]}")
|
853 |
+
print(f" medical*: {likelihood_name[safe.medical]}")
|
854 |
+
print(f" spoofed: {likelihood_name[safe.spoof]}")
|
855 |
+
print(f" violence*: {likelihood_name[safe.violence]}")
|
856 |
+
print(f" racy: {likelihood_name[safe.racy]}")
|
857 |
+
|
858 |
+
# Check the levels of adult, violence, racy, etc. content.
|
859 |
+
if (safe.adult > LEVEL or
|
860 |
+
safe.medical > LEVEL or
|
861 |
+
# safe.spoof > LEVEL or
|
862 |
+
safe.violence > LEVEL #or
|
863 |
+
# safe.racy > LEVEL
|
864 |
+
):
|
865 |
+
print("Found violation")
|
866 |
+
return True # The image violates safe search guidelines.
|
867 |
+
|
868 |
+
print("Found NO violation")
|
869 |
+
return False # The image is considered safe.
|
870 |
+
except:
|
871 |
+
return False # The image is considered safe. TEMPOROARY FIX TODO
|