seawolf2357 commited on
Commit
4a98e80
β€’
1 Parent(s): b183030

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -131
app.py CHANGED
@@ -43,110 +43,70 @@ MAX_RETRIES = 3
43
  class MyClient(discord.Client):
44
  def __init__(self, *args, **kwargs):
45
  super().__init__(*args, **kwargs)
46
- self.is_processing = False
47
  self.session = None
 
48
 
49
  async def on_ready(self):
50
  logging.info(f'{self.user}둜 λ‘œκ·ΈμΈλ˜μ—ˆμŠ΅λ‹ˆλ‹€!')
51
-
52
- # web.py 파일 μ‹€ν–‰
53
  subprocess.Popen(["python", "web.py"])
54
  logging.info("Web.py μ„œλ²„κ°€ μ‹œμž‘λ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
55
-
56
- # aiohttp ν΄λΌμ΄μ–ΈνŠΈ μ„Έμ…˜ 생성
57
  self.session = aiohttp.ClientSession()
58
-
59
- # 봇이 μ‹œμž‘λ  λ•Œ μ•ˆλ‚΄ λ©”μ‹œμ§€λ₯Ό 전솑
60
- channel = self.get_channel(SPECIFIC_CHANNEL_ID)
61
- if channel:
62
- await channel.send("유튜브 λΉ„λ””μ˜€ URL을 μž…λ ₯ν•˜λ©΄, μžλ§‰κ³Ό λŒ“κΈ€μ„ 기반으둜 닡글을 μž‘μ„±ν•©λ‹ˆλ‹€.")
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  async def on_message(self, message):
65
- if message.author == self.user:
66
- return
67
- if not self.is_message_in_specific_channel(message):
68
- return
69
- if self.is_processing:
70
- return
71
- self.is_processing = True
72
- try:
73
- video_id = extract_video_id(message.content)
74
- if video_id:
75
- transcript = await get_best_available_transcript(video_id)
76
- comments = await get_video_comments(video_id)
77
- if comments and transcript:
78
- replies = await generate_replies(comments, transcript)
79
- await create_thread_and_send_replies(message, video_id, comments, replies, self.session)
80
- else:
81
- await message.channel.send("μžλ§‰μ΄λ‚˜ λŒ“κΈ€μ„ κ°€μ Έμ˜¬ 수 μ—†μŠ΅λ‹ˆλ‹€.")
82
- else:
83
- await message.channel.send("μœ νš¨ν•œ 유튜브 λΉ„λ””μ˜€ URL을 μ œκ³΅ν•΄ μ£Όμ„Έμš”.")
84
- finally:
85
- self.is_processing = False
86
-
87
- def is_message_in_specific_channel(self, message):
88
- return message.channel.id == SPECIFIC_CHANNEL_ID or (
89
- isinstance(message.channel, discord.Thread) and message.channel.parent_id == SPECIFIC_CHANNEL_ID
90
- )
91
 
92
  async def close(self):
93
- # aiohttp ν΄λΌμ΄μ–ΈνŠΈ μ„Έμ…˜ μ’…λ£Œ
94
  if self.session:
95
  await self.session.close()
96
  await super().close()
97
 
98
  def extract_video_id(url):
99
- video_id = None
100
  youtube_regex = (
101
  r'(https?://)?(www\.)?'
102
  '(youtube|youtu|youtube-nocookie)\.(com|be)/'
103
  '(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})')
104
-
105
  match = re.match(youtube_regex, url)
106
  if match:
107
- video_id = match.group(6)
108
- logging.debug(f'μΆ”μΆœλœ λΉ„λ””μ˜€ ID: {video_id}')
109
- return video_id
110
 
111
  async def get_best_available_transcript(video_id):
112
  try:
113
  transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['ko'])
 
 
114
  except Exception as e:
115
- logging.warning(f'ν•œκ΅­μ–΄ μžλ§‰ κ°€μ Έμ˜€κΈ° 였λ₯˜: {e}')
116
- try:
117
- transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
118
- except Exception as e:
119
- logging.warning(f'μ˜μ–΄ μžλ§‰ κ°€μ Έμ˜€κΈ° 였λ₯˜: {e}')
120
- try:
121
- transcripts = YouTubeTranscriptApi.list_transcripts(video_id)
122
- transcript = transcripts.find_manually_created_transcript().fetch()
123
- except Exception as e:
124
- logging.error(f'λŒ€μ²΄ μžλ§‰ κ°€μ Έμ˜€κΈ° 였λ₯˜: {e}')
125
- return None
126
-
127
- formatter = TextFormatter()
128
- transcript_text = formatter.format_transcript(transcript)
129
- logging.debug(f'κ°€μ Έμ˜¨ μžλ§‰: {transcript_text}')
130
- return transcript_text
131
 
132
  async def get_video_comments(video_id):
133
  comments = []
134
- response = youtube_service.commentThreads().list(
135
- part='snippet',
136
- videoId=video_id,
137
- maxResults=100 # μ΅œλŒ€ 100개의 λŒ“κΈ€ κ°€μ Έμ˜€κΈ°
138
- ).execute()
139
-
140
  for item in response.get('items', []):
141
  comment = item['snippet']['topLevelComment']['snippet']['textOriginal']
142
  comment_id = item['snippet']['topLevelComment']['id']
143
  comments.append((comment, comment_id))
144
-
145
- logging.debug(f'κ°€μ Έμ˜¨ λŒ“κΈ€: {comments}')
146
  return comments
147
 
148
- async def generate_replies(comments, transcript):
149
- replies = []
150
  system_prompt = """
151
  λ„ˆλŠ” 유튜브 λŒ“κΈ€μ— 닡글을 μž‘μ„±ν•˜λŠ” 역할이닀. λ„ˆλŠ” μ•„μ£Ό μΉœμ ˆν•˜κ³  μ‰¬μš΄ λ‚΄μš©μœΌλ‘œ 전문적인 글을 '300 토큰 이내'둜 μž‘μ„±ν•˜μ—¬μ•Ό ν•œλ‹€.
152
  μ˜μƒμ—μ„œ μΆ”μΆœν•œ 'μžλ§‰'을 기반으둜 μ˜μƒ λ‚΄μš©μ— κΈ°λ°˜ν•œ 닡글을 μž‘μ„±ν•˜λΌ.
@@ -155,74 +115,32 @@ async def generate_replies(comments, transcript):
155
  λ°˜λ“œμ‹œ ν•œκΈ€λ‘œ λ‹΅λ³€ν•˜μ‹­μ‹œμ˜€.
156
  μž‘μ„±λœ κΈ€μ˜ λ§ˆμ§€λ§‰μ— λ°˜λ“œμ‹œ 인삿말과 OpenFreeAI 라고 μžμ‹ μ„ λ°ν˜€λΌ.
157
  """
158
- for comment, _ in comments:
159
- messages = [
160
- {"role": "system", "content": system_prompt},
161
- {"role": "user", "content": comment},
162
- {"role": "system", "content": f"λΉ„λ””μ˜€ μžλ§‰: {transcript}"}
163
- ]
164
- loop = asyncio.get_event_loop()
165
- response = await loop.run_in_executor(None, lambda: hf_client.chat_completion(
166
- messages, max_tokens=250, temperature=0.7, top_p=0.85))
167
-
168
- if response.choices and response.choices[0].message:
169
- reply = response.choices[0].message['content'].strip()
170
- else:
171
- reply = "닡글을 생성할 수 μ—†μŠ΅λ‹ˆλ‹€."
172
- replies.append(reply)
173
-
174
- logging.debug(f'μƒμ„±λœ λ‹΅κΈ€: {replies}')
175
- return replies
176
-
177
-
178
-
179
- async def send_webhook_data(session, chunk_data, chunk_number):
180
- max_retries = 3 # μ΅œλŒ€ μž¬μ‹œλ„ 횟수
181
- retry_delay = 1 # μž¬μ‹œλ„ μ‚¬μ΄μ˜ λŒ€κΈ° μ‹œκ°„ (초)
182
-
183
- for attempt in range(max_retries):
184
  try:
185
- async with session.post(WEBHOOK_URL, json=chunk_data) as response:
186
  if response.status == 200:
187
- logging.info(f"μ›Ήν›…μœΌλ‘œ 데이터 전솑 성곡: 청크 {chunk_number}, μ‹œλ„ {attempt+1}")
188
- return True # 성곡 μ‹œ μ’…λ£Œ
189
  else:
190
- logging.error(f"μ›Ήν›…μœΌλ‘œ 데이터 전솑 μ‹€νŒ¨: HTTP {response.status}, 청크 {chunk_number}, μ‹œλ„ {attempt+1}")
191
  except aiohttp.ClientError as e:
192
- logging.error(f"μ›Ήν›… 전솑 쀑 HTTP 였λ₯˜ λ°œμƒ: {e}, 청크 {chunk_number}, μ‹œλ„ {attempt+1}")
193
- except Exception as e:
194
- logging.error(f"μ›Ήν›… 전솑 쀑 μ•Œ 수 μ—†λŠ” 였λ₯˜ λ°œμƒ: {e}, 청크 {chunk_number}, μ‹œλ„ {attempt+1}")
195
-
196
- await asyncio.sleep(retry_delay) # μž¬μ‹œλ„ 전에 1초 λŒ€κΈ°
197
-
198
- logging.error(f"μ›Ήν›… 데이터 전솑 μ‹€νŒ¨, λͺ¨λ“  μž¬μ‹œλ„ μ†Œμ§„: 청크 {chunk_number}")
199
- return False # μž¬μ‹œλ„ 횟수 초과 μ‹œ μ‹€νŒ¨λ‘œ κ°„μ£Ό
200
-
201
-
202
-
203
- async def create_thread_and_send_replies(message, video_id, comments, replies, session):
204
- thread = await message.channel.create_thread(name=f"{message.author.name}의 λŒ“κΈ€ λ‹΅κΈ€", message=message)
205
- webhook_data = {"video_id": video_id, "replies": []}
206
-
207
- for (comment, comment_id), reply in zip(comments, replies):
208
- embed = discord.Embed(description=f"**λŒ“κΈ€**: {comment}\n**λ‹΅κΈ€**: {reply}")
209
- await thread.send(embed=embed)
210
-
211
- # μ›Ήν›… 데이터 μ€€λΉ„ (comment id 포함)
212
- webhook_data["replies"].append({"comment": comment, "reply": reply, "comment_id": comment_id})
213
-
214
- # 데이터λ₯Ό μ—¬λŸ¬ 번 λ‚˜λˆ„μ–΄ 전솑
215
- chunk_size = 1 # 전솑할 λ°μ΄ν„°μ˜ 개수λ₯Ό 1둜 μ„€μ •ν•˜μ—¬ 각 데이터λ₯Ό λ³„λ„λ‘œ 전솑
216
- for i in range(0, len(webhook_data["replies"]), chunk_size):
217
- chunk = webhook_data["replies"][i:i+chunk_size]
218
- chunk_data = {"video_id": video_id, "replies": chunk}
219
-
220
- success = await send_webhook_data(session, chunk_data, i // chunk_size + 1)
221
- if not success:
222
- logging.error(f"데이터 전솑 μ‹€νŒ¨: {i // chunk_size + 1} 번째 청크")
223
 
224
  if __name__ == "__main__":
225
  discord_client = MyClient(intents=intents)
226
  discord_client.run(os.getenv('DISCORD_TOKEN'))
227
-
228
-
 
43
  class MyClient(discord.Client):
44
  def __init__(self, *args, **kwargs):
45
  super().__init__(*args, **kwargs)
 
46
  self.session = None
47
+ self.last_comments = {}
48
 
49
  async def on_ready(self):
50
  logging.info(f'{self.user}둜 λ‘œκ·ΈμΈλ˜μ—ˆμŠ΅λ‹ˆλ‹€!')
 
 
51
  subprocess.Popen(["python", "web.py"])
52
  logging.info("Web.py μ„œλ²„κ°€ μ‹œμž‘λ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
 
 
53
  self.session = aiohttp.ClientSession()
54
+ self.loop.create_task(self.check_for_new_comments())
55
+
56
+ async def check_for_new_comments(self):
57
+ while True:
58
+ channel = self.get_channel(SPECIFIC_CHANNEL_ID)
59
+ if channel:
60
+ async for message in channel.history(limit=10):
61
+ video_id = extract_video_id(message.content)
62
+ if video_id:
63
+ new_comments = await get_video_comments(video_id)
64
+ old_comments = self.last_comments.get(video_id, [])
65
+ for comment in new_comments:
66
+ if comment not in old_comments:
67
+ transcript = await get_best_available_transcript(video_id)
68
+ reply = await generate_reply(comment[0], transcript)
69
+ await create_thread_and_send_reply(message, video_id, comment, reply, self.session)
70
+ self.last_comments[video_id] = new_comments
71
+ await asyncio.sleep(5)
72
 
73
  async def on_message(self, message):
74
+ pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  async def close(self):
 
77
  if self.session:
78
  await self.session.close()
79
  await super().close()
80
 
81
  def extract_video_id(url):
 
82
  youtube_regex = (
83
  r'(https?://)?(www\.)?'
84
  '(youtube|youtu|youtube-nocookie)\.(com|be)/'
85
  '(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})')
 
86
  match = re.match(youtube_regex, url)
87
  if match:
88
+ return match.group(6)
89
+ return None
 
90
 
91
  async def get_best_available_transcript(video_id):
92
  try:
93
  transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['ko'])
94
+ formatter = TextFormatter()
95
+ return formatter.format_transcript(transcript)
96
  except Exception as e:
97
+ logging.error(f"Failed to retrieve transcript: {e}")
98
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
  async def get_video_comments(video_id):
101
  comments = []
102
+ response = youtube_service.commentThreads().list(part='snippet', videoId=video_id, maxResults=100).execute()
 
 
 
 
 
103
  for item in response.get('items', []):
104
  comment = item['snippet']['topLevelComment']['snippet']['textOriginal']
105
  comment_id = item['snippet']['topLevelComment']['id']
106
  comments.append((comment, comment_id))
 
 
107
  return comments
108
 
109
+ async def generate_reply(comment, transcript):
 
110
  system_prompt = """
111
  λ„ˆλŠ” 유튜브 λŒ“κΈ€μ— 닡글을 μž‘μ„±ν•˜λŠ” 역할이닀. λ„ˆλŠ” μ•„μ£Ό μΉœμ ˆν•˜κ³  μ‰¬μš΄ λ‚΄μš©μœΌλ‘œ 전문적인 글을 '300 토큰 이내'둜 μž‘μ„±ν•˜μ—¬μ•Ό ν•œλ‹€.
112
  μ˜μƒμ—μ„œ μΆ”μΆœν•œ 'μžλ§‰'을 기반으둜 μ˜μƒ λ‚΄μš©μ— κΈ°λ°˜ν•œ 닡글을 μž‘μ„±ν•˜λΌ.
 
115
  λ°˜λ“œμ‹œ ν•œκΈ€λ‘œ λ‹΅λ³€ν•˜μ‹­μ‹œμ˜€.
116
  μž‘μ„±λœ κΈ€μ˜ λ§ˆμ§€λ§‰μ— λ°˜λ“œμ‹œ 인삿말과 OpenFreeAI 라고 μžμ‹ μ„ λ°ν˜€λΌ.
117
  """
118
+ prompt = f"{system_prompt}\nλŒ“κΈ€: {comment}\nλΉ„λ””μ˜€ μžλ§‰: {transcript}"
119
+ response = await hf_client(text=prompt, max_tokens=150)
120
+ return response['choices'][0]['text']
121
+
122
+ async def create_thread_and_send_reply(message, video_id, comment, reply, session):
123
+ thread = await message.channel.create_thread(name=f"{message.author.name}의 λŒ“κΈ€ λ‹΅κΈ€", message=message)
124
+ embed = discord.Embed(description=f"**λŒ“κΈ€**: {comment[0]}\n**λ‹΅κΈ€**: {reply}")
125
+ await thread.send(embed=embed)
126
+ webhook_data = {"video_id": video_id, "replies": [{"comment": comment[0], "reply": reply, "comment_id": comment[1]}]}
127
+ await send_webhook_data(session, webhook_data)
128
+
129
+ async def send_webhook_data(session, data):
130
+ # 데이터 λΆ„ν•  전솑 둜직 μΆ”κ°€
131
+ MAX_CHUNK_SIZE = 2000
132
+ data_json = json.dumps(data)
133
+ for i in range(0, len(data_json), MAX_CHUNK_SIZE):
134
+ chunk = data_json[i:i+MAX_CHUNK_SIZE]
 
 
 
 
 
 
 
 
 
135
  try:
136
+ async with session.post(WEBHOOK_URL, json=json.loads(chunk)) as response:
137
  if response.status == 200:
138
+ logging.info("Webhook data sent successfully.")
 
139
  else:
140
+ logging.error(f"Failed to send webhook data: HTTP {response.status}")
141
  except aiohttp.ClientError as e:
142
+ logging.error(f"HTTP error occurred while sending webhook data: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
  if __name__ == "__main__":
145
  discord_client = MyClient(intents=intents)
146
  discord_client.run(os.getenv('DISCORD_TOKEN'))