Spaces:

phyloforfun
/

VoucherVision

Running

App Files Files Community

phyloforfun commited on Jul 18, 2024

Commit

70768ef

2 Parent(s): c5e57d6 3ec1214

Merge branch 'main' of https://huggingface.co/spaces/phyloforfun/VoucherVision

Browse files

Files changed (2) hide show

app.py +17 -14
vouchervision/OCR_google_cloud_vision.py +39 -35

app.py CHANGED Viewed

@@ -218,10 +218,10 @@ if 'dir_images_local_TEMP' not in st.session_state:
     st.session_state['dir_images_local_TEMP'] = False
 if 'dir_uploaded_images' not in st.session_state:
     st.session_state['dir_uploaded_images'] = os.path.join(st.session_state.dir_home,'uploads')
-    validate_dir(os.path.join(st.session_state.dir_home,'uploads'))
 if 'dir_uploaded_images_small' not in st.session_state:
     st.session_state['dir_uploaded_images_small'] = os.path.join(st.session_state.dir_home,'uploads_small')
-    validate_dir(os.path.join(st.session_state.dir_home,'uploads_small'))
@@ -264,18 +264,16 @@ def handle_image_upload_and_gallery_hf(uploaded_files):
         ind_small = 0
         for uploaded_file in uploaded_files:
             if SAFE.check_for_inappropriate_content(uploaded_file):
                 clear_image_uploads()
                 report_violation(uploaded_file.name, is_hf=st.session_state['is_hf'])
                 st.error("Warning: You uploaded an image that violates our terms of service.")
-                return True
             # Determine the file type
             if uploaded_file.name.lower().endswith('.pdf'):
                 # Handle PDF files
-                file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file)
                 # Convert each page of the PDF to an image
                 n_pages = convert_pdf_to_jpg(file_path, st.session_state['dir_uploaded_images'], dpi=200)#st.session_state.config['leafmachine']['project']['dir_images_local'])
                 # Update the input list for each page image
@@ -290,22 +288,27 @@ def handle_image_upload_and_gallery_hf(uploaded_files):
                             # Optionally, create a thumbnail for the gallery
                             img = Image.open(jpg_file_path)
                             img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
-                            try:
                                 file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], file_name, img)
-                            except:
                                 file_path_small = save_uploaded_file_local(st.session_state['dir_uploaded_images_small'],st.session_state['dir_uploaded_images_small'], file_name, img)
                             st.session_state['input_list_small'].append(file_path_small)
             else:
                 ind_small += 1
                 # Handle JPG/JPEG files (existing process)
-                file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file)
                 st.session_state['input_list'].append(file_path)
-                if ind_small < MAX_GALLERY_IMAGES +5:
-                    img = Image.open(file_path)
-                    img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
-                    file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], uploaded_file, img)
-                    st.session_state['input_list_small'].append(file_path_small)
         # After processing all files
         st.session_state.config['leafmachine']['project']['dir_images_local'] = st.session_state['dir_uploaded_images']
@@ -393,7 +396,7 @@ def content_input_images(col_left, col_right):
     with col_right:
         if st.session_state.is_hf:
-            result = handle_image_upload_and_gallery_hf(uploaded_files)
         else:
             st.session_state['view_local_gallery'] = st.toggle("View Image Gallery",)

     st.session_state['dir_images_local_TEMP'] = False
 if 'dir_uploaded_images' not in st.session_state:
     st.session_state['dir_uploaded_images'] = os.path.join(st.session_state.dir_home,'uploads')
+    validate_dir(st.session_state['dir_uploaded_images'])
 if 'dir_uploaded_images_small' not in st.session_state:
     st.session_state['dir_uploaded_images_small'] = os.path.join(st.session_state.dir_home,'uploads_small')
+    validate_dir(st.session_state['dir_uploaded_images_small'])
         ind_small = 0
         for uploaded_file in uploaded_files:
             if SAFE.check_for_inappropriate_content(uploaded_file):
                 clear_image_uploads()
                 report_violation(uploaded_file.name, is_hf=st.session_state['is_hf'])
                 st.error("Warning: You uploaded an image that violates our terms of service.")
             # Determine the file type
             if uploaded_file.name.lower().endswith('.pdf'):
                 # Handle PDF files
+                file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file, image=None)
                 # Convert each page of the PDF to an image
                 n_pages = convert_pdf_to_jpg(file_path, st.session_state['dir_uploaded_images'], dpi=200)#st.session_state.config['leafmachine']['project']['dir_images_local'])
                 # Update the input list for each page image
                             # Optionally, create a thumbnail for the gallery
                             img = Image.open(jpg_file_path)
                             img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
+                            if st.session_state['is_hf']:
                                 file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], file_name, img)
+                            else:
                                 file_path_small = save_uploaded_file_local(st.session_state['dir_uploaded_images_small'],st.session_state['dir_uploaded_images_small'], file_name, img)
                             st.session_state['input_list_small'].append(file_path_small)
             else:
                 ind_small += 1
                 # Handle JPG/JPEG files (existing process)
+                # file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file, image=None) ######### Yale  TODO
+                # file_path = os.path.join(st.session_state['dir_uploaded_images'], uploaded_file.name)
+                image = Image.open(uploaded_file)
+                file_path = os.path.join(st.session_state['dir_uploaded_images'], uploaded_file.name)
+                image.save(file_path, "JPEG")
                 st.session_state['input_list'].append(file_path)
+                # if ind_small < MAX_GALLERY_IMAGES +5:
+                #     img = Image.open(file_path)
+                #     img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
+                #     file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], uploaded_file, img)
+                #     st.session_state['input_list_small'].append(file_path_small)
         # After processing all files
         st.session_state.config['leafmachine']['project']['dir_images_local'] = st.session_state['dir_uploaded_images']
     with col_right:
         if st.session_state.is_hf:
+            handle_image_upload_and_gallery_hf(uploaded_files)
         else:
             st.session_state['view_local_gallery'] = st.toggle("View Image Gallery",)

vouchervision/OCR_google_cloud_vision.py CHANGED Viewed

@@ -824,44 +824,48 @@ class SafetyCheck():
         else:
             self.client = vision.ImageAnnotatorClient(credentials=self.get_google_credentials())
     def get_google_credentials(self):
         creds_json_str = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
         credentials = service_account.Credentials.from_service_account_info(json.loads(creds_json_str))
         return credentials
     def check_for_inappropriate_content(self, file_stream):
-        LEVEL = 2
-        content = file_stream.read()
-        image = vision.Image(content=content)
-        response = self.client.safe_search_detection(image=image)
-        safe = response.safe_search_annotation
-        likelihood_name = (
-            "UNKNOWN",
-            "VERY_UNLIKELY",
-            "UNLIKELY",
-            "POSSIBLE",
-            "LIKELY",
-            "VERY_LIKELY",
-        )
-        print("Safe search:")
-        print(f"    adult*: {likelihood_name[safe.adult]}")
-        print(f"    medical*: {likelihood_name[safe.medical]}")
-        print(f"    spoofed: {likelihood_name[safe.spoof]}")
-        print(f"    violence*: {likelihood_name[safe.violence]}")
-        print(f"    racy: {likelihood_name[safe.racy]}")
-        # Check the levels of adult, violence, racy, etc. content.
-        if (safe.adult > LEVEL or
-            safe.medical > LEVEL or
-            # safe.spoof > LEVEL or
-            safe.violence > LEVEL #or
-            # safe.racy > LEVEL
-            ):
-            print("Found violation")
-            return True  # The image violates safe search guidelines.
-        print("Found NO violation")
-        return False  # The image is considered safe.

         else:
             self.client = vision.ImageAnnotatorClient(credentials=self.get_google_credentials())
     def get_google_credentials(self):
         creds_json_str = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
         credentials = service_account.Credentials.from_service_account_info(json.loads(creds_json_str))
         return credentials
     def check_for_inappropriate_content(self, file_stream):
+        try:
+            LEVEL = 2
+            # content = file_stream.read()
+            file_stream.seek(0)  # Reset file stream position to the beginning
+            content = file_stream.read()
+            image = vision.Image(content=content)
+            response = self.client.safe_search_detection(image=image)
+            safe = response.safe_search_annotation
+            likelihood_name = (
+                "UNKNOWN",
+                "VERY_UNLIKELY",
+                "UNLIKELY",
+                "POSSIBLE",
+                "LIKELY",
+                "VERY_LIKELY",
+            )
+            print("Safe search:")
+            print(f"    adult*: {likelihood_name[safe.adult]}")
+            print(f"    medical*: {likelihood_name[safe.medical]}")
+            print(f"    spoofed: {likelihood_name[safe.spoof]}")
+            print(f"    violence*: {likelihood_name[safe.violence]}")
+            print(f"    racy: {likelihood_name[safe.racy]}")
+            # Check the levels of adult, violence, racy, etc. content.
+            if (safe.adult > LEVEL or
+                safe.medical > LEVEL or
+                # safe.spoof > LEVEL or
+                safe.violence > LEVEL #or
+                # safe.racy > LEVEL
+                ):
+                print("Found violation")
+                return True  # The image violates safe search guidelines.
+            print("Found NO violation")
+            return False  # The image is considered safe.
+        except:
+            return False  # The image is considered safe. TEMPOROARY FIX TODO