Spaces:
Sleeping
Sleeping
rogerxavier
commited on
Create 0filterImage.py
Browse files- 0filterImage.py +53 -0
0filterImage.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#通过modelscope接口对问题图片予以删除,保证过审
|
2 |
+
|
3 |
+
import base64
|
4 |
+
import json
|
5 |
+
import os
|
6 |
+
from io import BytesIO
|
7 |
+
import pandas as pd
|
8 |
+
from PIL import Image
|
9 |
+
|
10 |
+
import requests
|
11 |
+
|
12 |
+
|
13 |
+
def ocr(image):
|
14 |
+
|
15 |
+
image = Image.open(image)
|
16 |
+
img_buffer = BytesIO()
|
17 |
+
image.save(img_buffer, format=image.format)
|
18 |
+
byte_data = img_buffer.getvalue()
|
19 |
+
base64_bytes = base64.b64encode(byte_data) # bytes
|
20 |
+
base64_str = base64_bytes.decode()
|
21 |
+
url = "https://www.modelscope.cn/api/v1/studio/damo/ofa_ocr_pipeline/gradio/api/predict/"
|
22 |
+
payload = json.dumps({
|
23 |
+
"data": [f"data:image/jpeg;base64,{base64_str}"],
|
24 |
+
"dataType": ["image"]
|
25 |
+
})
|
26 |
+
headers = {
|
27 |
+
'Content-Type': 'application/json'
|
28 |
+
}
|
29 |
+
|
30 |
+
response = requests.request("POST", url, headers=headers, data=payload)
|
31 |
+
jobj = json.loads(response.text)
|
32 |
+
return jobj
|
33 |
+
|
34 |
+
if __name__ == '__main__':
|
35 |
+
# 获取当前目录的子目录的路径
|
36 |
+
img_path = 'manga'
|
37 |
+
subdir_path = os.path.join(os.getcwd(), img_path)
|
38 |
+
|
39 |
+
# 图片素材获取(包含子目录下所有图片)
|
40 |
+
image_files = []
|
41 |
+
for root, dirs, files in os.walk(subdir_path):
|
42 |
+
for file in files:
|
43 |
+
if file.endswith(".jpg") or file.endswith(".png"):
|
44 |
+
image_files.append(os.path.relpath(os.path.join(root, file)))
|
45 |
+
for image_path in image_files:
|
46 |
+
result = ocr(image_path) ##dataframe格式 有两列 boxid 和text
|
47 |
+
if 'error' in result:
|
48 |
+
print("发现问题图片,需要删除以过审:",image_path)
|
49 |
+
os.remove(image_path)
|
50 |
+
else:
|
51 |
+
print(image_path, "图片没有问题")
|
52 |
+
|
53 |
+
|