Spaces:

rogerxavier
/

moviepy_with_manga_test

Sleeping

App Files Files Community

rogerxavier commited on May 29, 2024

Commit

8f0c284

verified ·

1 Parent(s): 0930ceb

Create 0filterImage.py

Browse files

Files changed (1) hide show

0filterImage.py +53 -0

0filterImage.py ADDED Viewed

	@@ -0,0 +1,53 @@

+#通过modelscope接口对问题图片予以删除，保证过审
+import base64
+import json
+import os
+from io import BytesIO
+import pandas as pd
+from PIL import Image
+import requests
+def ocr(image):
+    image = Image.open(image)
+    img_buffer = BytesIO()
+    image.save(img_buffer, format=image.format)
+    byte_data = img_buffer.getvalue()
+    base64_bytes = base64.b64encode(byte_data)  # bytes
+    base64_str = base64_bytes.decode()
+    url = "https://www.modelscope.cn/api/v1/studio/damo/ofa_ocr_pipeline/gradio/api/predict/"
+    payload = json.dumps({
+        "data": [f"data:image/jpeg;base64,{base64_str}"],
+        "dataType": ["image"]
+    })
+    headers = {
+        'Content-Type': 'application/json'
+    }
+    response = requests.request("POST", url, headers=headers, data=payload)
+    jobj = json.loads(response.text)
+    return jobj
+if __name__ == '__main__':
+    # 获取当前目录的子目录的路径
+    img_path = 'manga'
+    subdir_path = os.path.join(os.getcwd(), img_path)
+    # 图片素材获取（包含子目录下所有图片）
+    image_files = []
+    for root, dirs, files in os.walk(subdir_path):
+        for file in files:
+            if file.endswith(".jpg") or file.endswith(".png"):
+                image_files.append(os.path.relpath(os.path.join(root, file)))
+    for image_path in image_files:
+        result = ocr(image_path) ##dataframe格式  有两列  boxid 和text
+        if 'error' in result:
+            print("发现问题图片,需要删除以过审:",image_path)
+            os.remove(image_path)
+        else:
+            print(image_path, "图片没有问题")