moviepy_with_manga_test / 0filterImage.py
rogerxavier's picture
Create 0filterImage.py
8f0c284 verified
raw history blame
No virus
1.62 kB
#通过modelscope接口对问题图片予以删除,保证过审
import base64
import json
import os
from io import BytesIO
import pandas as pd
from PIL import Image
import requests
def ocr(image):
image = Image.open(image)
img_buffer = BytesIO()
image.save(img_buffer, format=image.format)
byte_data = img_buffer.getvalue()
base64_bytes = base64.b64encode(byte_data) # bytes
base64_str = base64_bytes.decode()
url = "https://www.modelscope.cn/api/v1/studio/damo/ofa_ocr_pipeline/gradio/api/predict/"
payload = json.dumps({
"data": [f"data:image/jpeg;base64,{base64_str}"],
"dataType": ["image"]
})
headers = {
'Content-Type': 'application/json'
}
response = requests.request("POST", url, headers=headers, data=payload)
jobj = json.loads(response.text)
return jobj
if __name__ == '__main__':
# 获取当前目录的子目录的路径
img_path = 'manga'
subdir_path = os.path.join(os.getcwd(), img_path)
# 图片素材获取(包含子目录下所有图片)
image_files = []
for root, dirs, files in os.walk(subdir_path):
for file in files:
if file.endswith(".jpg") or file.endswith(".png"):
image_files.append(os.path.relpath(os.path.join(root, file)))
for image_path in image_files:
result = ocr(image_path) ##dataframe格式 有两列 boxid 和text
if 'error' in result:
print("发现问题图片,需要删除以过审:",image_path)
os.remove(image_path)
else:
print(image_path, "图片没有问题")