rogerxavier commited on
Commit
e8bc308
1 Parent(s): 4e0f6d1

Update 0filterImage.py

Browse files
Files changed (1) hide show
  1. 0filterImage.py +13 -23
0filterImage.py CHANGED
@@ -7,32 +7,22 @@ import os
7
  from io import BytesIO
8
  import pandas as pd
9
  from PIL import Image
10
-
11
  import requests
 
12
 
13
 
14
- def ocr(image):
15
-
16
- image = Image.open(image)
17
- img_buffer = BytesIO()
18
- image.save(img_buffer, format=image.format)
19
- byte_data = img_buffer.getvalue()
20
- base64_bytes = base64.b64encode(byte_data) # bytes
21
- base64_str = base64_bytes.decode()
22
- url = "https://www.modelscope.cn/api/v1/studio/damo/ofa_ocr_pipeline/gradio/api/predict/"
23
- payload = json.dumps({
24
- "data": [f"data:image/jpeg;base64,{base64_str}"],
25
- "dataType": ["image"]
26
- })
27
- headers = {
28
- 'Content-Type': 'application/json'
29
- }
30
-
31
- response = requests.request("POST", url, headers=headers, data=payload)
32
- jobj = json.loads(response.text)
33
- return jobj
34
 
35
  if __name__ == '__main__':
 
 
36
  # 获取当前目录的子目录的路径
37
  img_path = 'manga'
38
  subdir_path = os.path.join(os.getcwd(), img_path)
@@ -44,8 +34,8 @@ if __name__ == '__main__':
44
  if file.endswith(".jpg") or file.endswith(".png"):
45
  image_files.append(os.path.relpath(os.path.join(root, file)))
46
  for image_path in image_files:
47
- result = ocr(image_path) ##dataframe格式 有两列 boxid 和text
48
- if 'error' in result:
49
  print("发现问题图片,需要删除以过审:",image_path)
50
  os.remove(image_path)
51
  else:
 
7
  from io import BytesIO
8
  import pandas as pd
9
  from PIL import Image
10
+ from dotenv import load_dotenv
11
  import requests
12
+ from transformers import pipeline
13
 
14
 
15
+ def get_nsfw_score(image_path:str,model:"模型")->float:
16
+ #输入图片和模型,返回是否有问题
17
+ img = Image.open(image_path)
18
+ result = model(images=img)
19
+ nsfw_score = next((item['score'] for item in result if item['label']=='nsfw'),None)
20
+ return nsfw_score
21
+
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  if __name__ == '__main__':
24
+ load_dotenv()
25
+ model = pipeline("image-classification", model="Falconsai/nsfw_image_detection")#加载模型
26
  # 获取当前目录的子目录的路径
27
  img_path = 'manga'
28
  subdir_path = os.path.join(os.getcwd(), img_path)
 
34
  if file.endswith(".jpg") or file.endswith(".png"):
35
  image_files.append(os.path.relpath(os.path.join(root, file)))
36
  for image_path in image_files:
37
+ result = get_nsfw_score(image_path)#返回float的得分
38
+ if result> 0.5:
39
  print("发现问题图片,需要删除以过审:",image_path)
40
  os.remove(image_path)
41
  else: