rogerxavier commited on
Commit
20d0048
1 Parent(s): 1e2d513

Create 2magiDialogCut.py

Browse files
Files changed (1) hide show
  1. 2magiDialogCut.py +122 -0
2magiDialogCut.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import requests
3
+ import json
4
+ from PIL import Image
5
+ import os
6
+
7
+
8
+ def getImgCoordinatesByMagi(image_path:"包含后缀的文件路径") -> "全部对话坐标list,失败返回none":
9
+ headers = {
10
+ 'authority': 'rogerxavier-fastapi-t5-magi.hf.space',
11
+ 'method': 'GET',
12
+ 'scheme': 'https',
13
+ 'Accept': '*/*',
14
+ 'Accept-Encoding': 'gzip, deflate, br, zstd',
15
+ 'Accept-Language': 'zh-CN,zh;q=0.9',
16
+ 'Cookie': 'spaces-jwt=eyJhbGciOiJFZERTQSJ9.eyJyZWFkIjp0cnVlLCJwZXJtaXNzaW9ucyI6eyJyZXBvLmNvbnRlbnQucmVhZCI6dHJ1ZX0sIm9uQmVoYWxmT2YiOnsia2luZCI6InVzZXIiLCJfaWQiOiI2NDJhNTNiNTE2ZDRkODI5M2M5YjdiNzgiLCJ1c2VyIjoicm9nZXJ4YXZpZXIifSwiaWF0IjoxNzE2NjExMTE3LCJzdWIiOiIvc3BhY2VzL3JvZ2VyeGF2aWVyL2Zhc3RhcGlfdDVfbWFnaSIsImV4cCI6MTcxNjY5NzUxNywiaXNzIjoiaHR0cHM6Ly9odWdnaW5nZmFjZS5jbyJ9.W00jo8kiRgwCpq5aaGhaPE2RP2jLOyvfimjyIfHVhP1gs7NHkBkRzVTFqYv3TRtZoHNPTiFiI5Ehu12KP06sDQ',
17
+ 'Dnt': '1',
18
+ 'Priority': 'u=1, i',
19
+ 'Sec-Ch-Ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
20
+ 'Sec-Ch-Ua-Mobile': '?0',
21
+ 'Sec-Ch-Ua-Platform': '"Windows"',
22
+ 'Sec-Fetch-Dest': 'empty',
23
+ 'Sec-Fetch-Mode': 'cors',
24
+ 'Sec-Fetch-Site': 'same-origin',
25
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
26
+ }
27
+ with open(image_path, 'rb') as file:
28
+ image_bytes = file.read()
29
+ files = {
30
+ "image": image_bytes,
31
+ }
32
+ magi_fastapi_base = 'https://rogerxavier-fastapi-t5-magi.hf.space/getCoordinates'
33
+ try:
34
+ resp = requests.post(magi_fastapi_base, files=files, headers=headers) ##是仓库public后好像就api可用了
35
+ print(resp.json())
36
+ return resp.json()["texts"]
37
+ except Exception as e:
38
+ print(e)
39
+ return None #意外情况返回
40
+
41
+
42
+ def path_to_format(old_path)->"比如/path/to/your/image/1.jpg ->/path/to/your/image/100.jpg 经过03d":
43
+ # 获取文件名和文件扩展名
44
+ file_path, file_name = os.path.split(old_path)
45
+ file_name, file_extension = os.path.splitext(file_name)
46
+ # 提取文件名中的数字部分(假设数字部分在文件名的末尾)
47
+ number_str = ''.join(filter(str.isdigit, file_name))
48
+ number = int(number_str)
49
+ # 格式化数字部分为03d格式
50
+ formatted_number = "{:03d}".format(number)
51
+ # 生成新的文件名
52
+ new_file_name = f"{file_name.replace(number_str, formatted_number)}{file_extension}"
53
+ new_absolute_path = os.path.join(file_path, new_file_name)
54
+ return new_absolute_path
55
+
56
+
57
+ def save_img(new_save_path:"新文件的保存路径(包含后缀)",old_img_path:"旧文件路径(包含后缀)")->"void生成新的文件保存 ,传入旧文件路径是为了删除有问题的旧文件":
58
+ print(new_save_path)
59
+ # 原始照片文件名
60
+ original_image = old_img_path
61
+ #打开原始照片
62
+ # img = Image.open(original_image)
63
+ # text_bboxes_for_all_images = getImgCoordinatesByMagi(original_image)
64
+ # if text_bboxes_for_all_images is not None:
65
+ # for index, box in enumerate(text_bboxes_for_all_images):
66
+ # cropped_img = img.crop(tuple(box))
67
+ # # 保存裁剪后的图片,并按照数字大小依次排序命名(包括本身的也要3d,比如 0.jpg - >000.000_cropped.jpg)
68
+ #
69
+ # cropped_img.save(f"{os.path.splitext(new_save_path)[0]}_{index:03d}_cropped.jpg")
70
+ # else:
71
+ # print("图片识别有问题,准备删除")
72
+ # os.remove(original_image)
73
+
74
+
75
+ ##防止文件打开无法删除
76
+
77
+ text_bboxes_for_all_images = getImgCoordinatesByMagi(original_image)
78
+ if text_bboxes_for_all_images is not None:
79
+ with Image.open(original_image) as img:
80
+ for index, box in enumerate(text_bboxes_for_all_images):
81
+ cropped_img = img.crop(tuple(box))
82
+ # 保存裁剪后的图片,并按照数字大小依次排序命名(包括本身的也要3d,比如 0.jpg - >000.000_cropped.jpg)
83
+
84
+ cropped_img.save(f"{os.path.splitext(new_save_path)[0]}_{index:03d}_cropped.jpg")
85
+ else:
86
+ print("图片识别有问题,准备删除")
87
+ os.remove(original_image)
88
+
89
+
90
+ if __name__ == '__main__':
91
+ # 获取需要裁剪的无水印漫画位置
92
+ img_path = 'manga1'
93
+ subdir_path = os.path.join(os.getcwd(), img_path)
94
+
95
+ # 图片素材获取(包含子目录下所有图片)
96
+ image_files = []
97
+ for root, dirs, files in os.walk(subdir_path):
98
+ for file in files:
99
+ if file.endswith(".jpg") or file.endswith(".png"):
100
+ image_files.append(os.path.relpath(os.path.join(root, file)))
101
+
102
+ # 创建处理后的子目录在与image_files同级目录下
103
+ processed_subdir_path = os.path.join(os.path.dirname(subdir_path), f"{img_path}2")#加2表示经过2阶段处理
104
+ os.makedirs(processed_subdir_path, exist_ok=True)
105
+
106
+ # 对image_files进行某种处理,生成新图片,并保存在处理后的子目录中
107
+ for img_file in image_files:
108
+ # 处理图片的代码(这里仅作示例)
109
+ # 假设处理后的图片为new_img
110
+ img_dir = os.path.dirname(img_file)
111
+ new_img_dir = os.path.join(processed_subdir_path, img_dir)
112
+ os.makedirs(new_img_dir, exist_ok=True)
113
+
114
+ new_img_path = os.path.join(new_img_dir, os.path.basename(img_file))
115
+ new_img_path = path_to_format(new_img_path)
116
+
117
+ if not os.path.exists(f"{os.path.splitext(new_img_path)[0]}_{1:03d}_cropped.jpg"):
118
+ # 如果已经处理过那么跳过 ->只看第一个数字片段就行
119
+ # 处理图片并保存
120
+ save_img(new_save_path=new_img_path, old_img_path=img_file)
121
+ else:
122
+ print(f"Skipping {new_img_path} as it already exists.")