phyloforfun commited on
Commit
70768ef
2 Parent(s): c5e57d6 3ec1214

Merge branch 'main' of https://huggingface.co/spaces/phyloforfun/VoucherVision

Browse files
Files changed (2) hide show
  1. app.py +17 -14
  2. vouchervision/OCR_google_cloud_vision.py +39 -35
app.py CHANGED
@@ -218,10 +218,10 @@ if 'dir_images_local_TEMP' not in st.session_state:
218
  st.session_state['dir_images_local_TEMP'] = False
219
  if 'dir_uploaded_images' not in st.session_state:
220
  st.session_state['dir_uploaded_images'] = os.path.join(st.session_state.dir_home,'uploads')
221
- validate_dir(os.path.join(st.session_state.dir_home,'uploads'))
222
  if 'dir_uploaded_images_small' not in st.session_state:
223
  st.session_state['dir_uploaded_images_small'] = os.path.join(st.session_state.dir_home,'uploads_small')
224
- validate_dir(os.path.join(st.session_state.dir_home,'uploads_small'))
225
 
226
 
227
 
@@ -264,18 +264,16 @@ def handle_image_upload_and_gallery_hf(uploaded_files):
264
 
265
  ind_small = 0
266
  for uploaded_file in uploaded_files:
267
-
268
  if SAFE.check_for_inappropriate_content(uploaded_file):
269
  clear_image_uploads()
270
  report_violation(uploaded_file.name, is_hf=st.session_state['is_hf'])
271
  st.error("Warning: You uploaded an image that violates our terms of service.")
272
- return True
273
 
274
 
275
  # Determine the file type
276
  if uploaded_file.name.lower().endswith('.pdf'):
277
  # Handle PDF files
278
- file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file)
279
  # Convert each page of the PDF to an image
280
  n_pages = convert_pdf_to_jpg(file_path, st.session_state['dir_uploaded_images'], dpi=200)#st.session_state.config['leafmachine']['project']['dir_images_local'])
281
  # Update the input list for each page image
@@ -290,22 +288,27 @@ def handle_image_upload_and_gallery_hf(uploaded_files):
290
  # Optionally, create a thumbnail for the gallery
291
  img = Image.open(jpg_file_path)
292
  img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
293
- try:
294
  file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], file_name, img)
295
- except:
296
  file_path_small = save_uploaded_file_local(st.session_state['dir_uploaded_images_small'],st.session_state['dir_uploaded_images_small'], file_name, img)
297
  st.session_state['input_list_small'].append(file_path_small)
298
 
299
  else:
300
  ind_small += 1
301
  # Handle JPG/JPEG files (existing process)
302
- file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file)
 
 
 
 
 
303
  st.session_state['input_list'].append(file_path)
304
- if ind_small < MAX_GALLERY_IMAGES +5:
305
- img = Image.open(file_path)
306
- img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
307
- file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], uploaded_file, img)
308
- st.session_state['input_list_small'].append(file_path_small)
309
 
310
  # After processing all files
311
  st.session_state.config['leafmachine']['project']['dir_images_local'] = st.session_state['dir_uploaded_images']
@@ -393,7 +396,7 @@ def content_input_images(col_left, col_right):
393
 
394
  with col_right:
395
  if st.session_state.is_hf:
396
- result = handle_image_upload_and_gallery_hf(uploaded_files)
397
 
398
  else:
399
  st.session_state['view_local_gallery'] = st.toggle("View Image Gallery",)
 
218
  st.session_state['dir_images_local_TEMP'] = False
219
  if 'dir_uploaded_images' not in st.session_state:
220
  st.session_state['dir_uploaded_images'] = os.path.join(st.session_state.dir_home,'uploads')
221
+ validate_dir(st.session_state['dir_uploaded_images'])
222
  if 'dir_uploaded_images_small' not in st.session_state:
223
  st.session_state['dir_uploaded_images_small'] = os.path.join(st.session_state.dir_home,'uploads_small')
224
+ validate_dir(st.session_state['dir_uploaded_images_small'])
225
 
226
 
227
 
 
264
 
265
  ind_small = 0
266
  for uploaded_file in uploaded_files:
 
267
  if SAFE.check_for_inappropriate_content(uploaded_file):
268
  clear_image_uploads()
269
  report_violation(uploaded_file.name, is_hf=st.session_state['is_hf'])
270
  st.error("Warning: You uploaded an image that violates our terms of service.")
 
271
 
272
 
273
  # Determine the file type
274
  if uploaded_file.name.lower().endswith('.pdf'):
275
  # Handle PDF files
276
+ file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file, image=None)
277
  # Convert each page of the PDF to an image
278
  n_pages = convert_pdf_to_jpg(file_path, st.session_state['dir_uploaded_images'], dpi=200)#st.session_state.config['leafmachine']['project']['dir_images_local'])
279
  # Update the input list for each page image
 
288
  # Optionally, create a thumbnail for the gallery
289
  img = Image.open(jpg_file_path)
290
  img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
291
+ if st.session_state['is_hf']:
292
  file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], file_name, img)
293
+ else:
294
  file_path_small = save_uploaded_file_local(st.session_state['dir_uploaded_images_small'],st.session_state['dir_uploaded_images_small'], file_name, img)
295
  st.session_state['input_list_small'].append(file_path_small)
296
 
297
  else:
298
  ind_small += 1
299
  # Handle JPG/JPEG files (existing process)
300
+ # file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file, image=None) ######### Yale TODO
301
+ # file_path = os.path.join(st.session_state['dir_uploaded_images'], uploaded_file.name)
302
+ image = Image.open(uploaded_file)
303
+ file_path = os.path.join(st.session_state['dir_uploaded_images'], uploaded_file.name)
304
+ image.save(file_path, "JPEG")
305
+
306
  st.session_state['input_list'].append(file_path)
307
+ # if ind_small < MAX_GALLERY_IMAGES +5:
308
+ # img = Image.open(file_path)
309
+ # img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
310
+ # file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], uploaded_file, img)
311
+ # st.session_state['input_list_small'].append(file_path_small)
312
 
313
  # After processing all files
314
  st.session_state.config['leafmachine']['project']['dir_images_local'] = st.session_state['dir_uploaded_images']
 
396
 
397
  with col_right:
398
  if st.session_state.is_hf:
399
+ handle_image_upload_and_gallery_hf(uploaded_files)
400
 
401
  else:
402
  st.session_state['view_local_gallery'] = st.toggle("View Image Gallery",)
vouchervision/OCR_google_cloud_vision.py CHANGED
@@ -824,44 +824,48 @@ class SafetyCheck():
824
  else:
825
  self.client = vision.ImageAnnotatorClient(credentials=self.get_google_credentials())
826
 
827
-
828
  def get_google_credentials(self):
829
  creds_json_str = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
830
  credentials = service_account.Credentials.from_service_account_info(json.loads(creds_json_str))
831
  return credentials
832
 
833
  def check_for_inappropriate_content(self, file_stream):
834
- LEVEL = 2
835
- content = file_stream.read()
836
- image = vision.Image(content=content)
837
- response = self.client.safe_search_detection(image=image)
838
- safe = response.safe_search_annotation
839
-
840
- likelihood_name = (
841
- "UNKNOWN",
842
- "VERY_UNLIKELY",
843
- "UNLIKELY",
844
- "POSSIBLE",
845
- "LIKELY",
846
- "VERY_LIKELY",
847
- )
848
- print("Safe search:")
849
-
850
- print(f" adult*: {likelihood_name[safe.adult]}")
851
- print(f" medical*: {likelihood_name[safe.medical]}")
852
- print(f" spoofed: {likelihood_name[safe.spoof]}")
853
- print(f" violence*: {likelihood_name[safe.violence]}")
854
- print(f" racy: {likelihood_name[safe.racy]}")
855
-
856
- # Check the levels of adult, violence, racy, etc. content.
857
- if (safe.adult > LEVEL or
858
- safe.medical > LEVEL or
859
- # safe.spoof > LEVEL or
860
- safe.violence > LEVEL #or
861
- # safe.racy > LEVEL
862
- ):
863
- print("Found violation")
864
- return True # The image violates safe search guidelines.
865
-
866
- print("Found NO violation")
867
- return False # The image is considered safe.
 
 
 
 
 
 
824
  else:
825
  self.client = vision.ImageAnnotatorClient(credentials=self.get_google_credentials())
826
 
 
827
  def get_google_credentials(self):
828
  creds_json_str = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
829
  credentials = service_account.Credentials.from_service_account_info(json.loads(creds_json_str))
830
  return credentials
831
 
832
  def check_for_inappropriate_content(self, file_stream):
833
+ try:
834
+ LEVEL = 2
835
+ # content = file_stream.read()
836
+ file_stream.seek(0) # Reset file stream position to the beginning
837
+ content = file_stream.read()
838
+ image = vision.Image(content=content)
839
+ response = self.client.safe_search_detection(image=image)
840
+ safe = response.safe_search_annotation
841
+
842
+ likelihood_name = (
843
+ "UNKNOWN",
844
+ "VERY_UNLIKELY",
845
+ "UNLIKELY",
846
+ "POSSIBLE",
847
+ "LIKELY",
848
+ "VERY_LIKELY",
849
+ )
850
+ print("Safe search:")
851
+
852
+ print(f" adult*: {likelihood_name[safe.adult]}")
853
+ print(f" medical*: {likelihood_name[safe.medical]}")
854
+ print(f" spoofed: {likelihood_name[safe.spoof]}")
855
+ print(f" violence*: {likelihood_name[safe.violence]}")
856
+ print(f" racy: {likelihood_name[safe.racy]}")
857
+
858
+ # Check the levels of adult, violence, racy, etc. content.
859
+ if (safe.adult > LEVEL or
860
+ safe.medical > LEVEL or
861
+ # safe.spoof > LEVEL or
862
+ safe.violence > LEVEL #or
863
+ # safe.racy > LEVEL
864
+ ):
865
+ print("Found violation")
866
+ return True # The image violates safe search guidelines.
867
+
868
+ print("Found NO violation")
869
+ return False # The image is considered safe.
870
+ except:
871
+ return False # The image is considered safe. TEMPOROARY FIX TODO