izuemon commited on
Commit
d385cc0
·
verified ·
1 Parent(s): 4367451

Create mmng.py

Browse files
Files changed (1) hide show
  1. mmng.py +282 -0
mmng.py ADDED
@@ -0,0 +1,282 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import time
4
+ import json
5
+ import requests
6
+ import img2pdf
7
+ import pdfkit
8
+ from bs4 import BeautifulSoup
9
+ from datetime import datetime, timezone
10
+ from io import BytesIO
11
+
12
+ # ===== Channel.io 設定 =====
13
+ GROUP_ID = "551316"
14
+
15
+ GET_URL = f"https://desk-api.channel.io/desk/channels/200605/groups/{GROUP_ID}/messages"
16
+ POST_URL = GET_URL
17
+
18
+ PARAMS = {
19
+ "sortOrder": "desc",
20
+ "limit": 36,
21
+ "logFolded": "false",
22
+ }
23
+
24
+ X_ACCOUNT = os.getenv("dmsendertoken")
25
+ if not X_ACCOUNT:
26
+ raise RuntimeError("環境変数 dmsendertoken が設定されていません")
27
+
28
+ HEADERS_GET = {
29
+ "accept": "application/json",
30
+ "accept-language": "ja",
31
+ "x-account": X_ACCOUNT,
32
+ }
33
+
34
+ HEADERS_POST = {
35
+ "accept": "application/json",
36
+ "accept-language": "ja",
37
+ "content-type": "application/json",
38
+ "x-account": X_ACCOUNT,
39
+ }
40
+
41
+ # ===== Utils =====
42
+
43
+ def parse_updated_at(value):
44
+ if isinstance(value, (int, float)):
45
+ return datetime.fromtimestamp(value / 1000, tz=timezone.utc)
46
+ elif isinstance(value, str):
47
+ return datetime.fromisoformat(value.replace("Z", "+00:00"))
48
+ return None
49
+
50
+
51
+ def extract_url(text):
52
+ url_pattern = r"https?://[^\s]+"
53
+ m = re.search(url_pattern, text)
54
+ if m:
55
+ return m.group(0)
56
+ return None
57
+
58
+
59
+ def is_mmnga_magazine(url):
60
+ return re.search(r"momon-ga\.com/magazine/", url)
61
+
62
+
63
+ # ===== PDF作成 =====
64
+
65
+ def create_pdf_from_images(image_urls):
66
+
67
+ image_bytes = []
68
+
69
+ for url in image_urls:
70
+ r = requests.get(url, timeout=60)
71
+ r.raise_for_status()
72
+ image_bytes.append(r.content)
73
+
74
+ pdf_bytes = img2pdf.convert(image_bytes)
75
+
76
+ return pdf_bytes
77
+
78
+
79
+ def create_fullpage_pdf(url):
80
+
81
+ pdf = pdfkit.from_url(url, False)
82
+
83
+ return pdf
84
+
85
+
86
+ # ===== mmnga処理 =====
87
+
88
+ def get_mmnga_images(url):
89
+
90
+ r = requests.get(url, timeout=60)
91
+ r.raise_for_status()
92
+
93
+ soup = BeautifulSoup(r.text, "html.parser")
94
+
95
+ post = soup.find("div", id="post")
96
+
97
+ if not post:
98
+ raise Exception("post divが見つかりません")
99
+
100
+ imgs = post.find_all("img")
101
+
102
+ image_urls = []
103
+
104
+ for img in imgs:
105
+ src = img.get("src")
106
+ if not src:
107
+ continue
108
+
109
+ if src.startswith("//"):
110
+ src = "https:" + src
111
+
112
+ if src.startswith("/"):
113
+ src = "https://momon-ga.com" + src
114
+
115
+ image_urls.append(src)
116
+
117
+ if not image_urls:
118
+ raise Exception("画像が見つかりません")
119
+
120
+ return image_urls
121
+
122
+
123
+ # ===== Channel送信 =====
124
+
125
+ def send_to_channel(text):
126
+
127
+ payload = {
128
+ "requestId": f"desk-web-{int(time.time()*1000)}",
129
+ "blocks": [
130
+ {"type": "text", "value": text}
131
+ ],
132
+ }
133
+
134
+ r = requests.post(
135
+ POST_URL,
136
+ headers=HEADERS_POST,
137
+ data=json.dumps(payload),
138
+ timeout=30
139
+ )
140
+
141
+ r.raise_for_status()
142
+
143
+
144
+ def upload_file_to_channel(file_bytes):
145
+
146
+ upload_url = f"https://media.channel.io/cht/v1/pri-file/200605/groups/{GROUP_ID}/message/send_pdf_file.pdf"
147
+
148
+ headers = {
149
+ "x-account": X_ACCOUNT,
150
+ "Content-Type": "application/pdf",
151
+ "Content-Length": str(len(file_bytes)),
152
+ }
153
+
154
+ r = requests.post(
155
+ upload_url,
156
+ headers=headers,
157
+ data=file_bytes,
158
+ timeout=300
159
+ )
160
+
161
+ r.raise_for_status()
162
+
163
+ return r.json()
164
+
165
+
166
+ def send_pdf_message(file_json):
167
+
168
+ payload = {
169
+ "requestId": f"desk-web-{int(time.time()*1000)}",
170
+ "blocks": [
171
+ {"type": "text", "value": "PDFプレビュー"}
172
+ ],
173
+ "files": [file_json],
174
+ }
175
+
176
+ r = requests.post(
177
+ POST_URL,
178
+ headers=HEADERS_POST,
179
+ data=json.dumps(payload),
180
+ timeout=30
181
+ )
182
+
183
+ r.raise_for_status()
184
+
185
+
186
+ # ===== Main =====
187
+
188
+ def main():
189
+
190
+ processed_messages = set()
191
+
192
+ while True:
193
+
194
+ try:
195
+
196
+ res = requests.get(
197
+ GET_URL,
198
+ headers=HEADERS_GET,
199
+ params=PARAMS,
200
+ timeout=30
201
+ )
202
+
203
+ res.raise_for_status()
204
+
205
+ messages = res.json().get("messages", [])
206
+
207
+ latest_msg = None
208
+ latest_time = None
209
+
210
+ for msg in messages:
211
+
212
+ msg_id = msg.get("id")
213
+ text = msg.get("plainText")
214
+ updated_at = msg.get("updatedAt")
215
+
216
+ if not text:
217
+ continue
218
+
219
+ if msg_id in processed_messages:
220
+ continue
221
+
222
+ t = parse_updated_at(updated_at)
223
+
224
+ if not t:
225
+ continue
226
+
227
+ if latest_time is None or t > latest_time:
228
+ latest_time = t
229
+ latest_msg = msg
230
+
231
+ if not latest_msg:
232
+ time.sleep(10)
233
+ continue
234
+
235
+ url = extract_url(latest_msg["plainText"])
236
+
237
+ if not url:
238
+ processed_messages.add(latest_msg["id"])
239
+ time.sleep(10)
240
+ continue
241
+
242
+ send_to_channel("PDF生成を開始します")
243
+
244
+ try:
245
+
246
+ if is_mmnga_magazine(url):
247
+
248
+ send_to_channel("mmnga漫画ページを検出しました")
249
+
250
+ image_urls = get_mmnga_images(url)
251
+
252
+ pdf_bytes = create_pdf_from_images(image_urls)
253
+
254
+ else:
255
+
256
+ send_to_channel("通常ページをPDF化します")
257
+
258
+ pdf_bytes = create_fullpage_pdf(url)
259
+
260
+ send_to_channel("PDFをアップロードしています")
261
+
262
+ file_json = upload_file_to_channel(pdf_bytes)
263
+
264
+ send_pdf_message(file_json)
265
+
266
+ send_to_channel("完了しました")
267
+
268
+ except Exception as e:
269
+
270
+ send_to_channel(f"エラー: {e}")
271
+
272
+ processed_messages.add(latest_msg["id"])
273
+
274
+ except Exception as e:
275
+
276
+ print("error:", e)
277
+
278
+ time.sleep(15)
279
+
280
+
281
+ if __name__ == "__main__":
282
+ main()