seawolf2357's picture
Update app.py
6a13d8f verified
raw
history blame
8.3 kB
import discord
import logging
import os
import re
import asyncio
import json
import subprocess
from huggingface_hub import InferenceClient
from googleapiclient.discovery import build
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter
from dotenv import load_dotenv
# ν™˜κ²½ λ³€μˆ˜ λ‘œλ“œ
load_dotenv()
# JSON_TOKEN.json 파일의 경둜
credentials_path = 'JSON_TOKEN.json'
token_path = 'token.json'
# λ‘œκΉ… μ„€μ •
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s:%(levelname)s:%(name)s:%(message)s', handlers=[logging.StreamHandler()])
# μΈν…νŠΈ μ„€μ •
intents = discord.Intents.default()
intents.message_content = True
intents.messages = True
intents.guilds = True
intents.guild_messages = True
# μΆ”λ‘  API ν΄λΌμ΄μ–ΈνŠΈ μ„€μ •
hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus", token=os.getenv("HF_TOKEN"))
# OAuth 2.0 인증 μ„€μ •
SCOPES = ["https://www.googleapis.com/auth/youtube.force-ssl"]
creds = None
if os.path.exists(token_path):
creds = Credentials.from_authorized_user_file(token_path, SCOPES)
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(credentials_path, SCOPES)
creds = flow.run_local_server(port=0)
with open(token_path, 'w') as token:
token.write(creds.to_json())
youtube_service = build('youtube', 'v3', credentials=creds)
# νŠΉμ • 채널 ID
SPECIFIC_CHANNEL_ID = int(os.getenv("DISCORD_CHANNEL_ID"))
class MyClient(discord.Client):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.is_processing = False
async def on_ready(self):
logging.info(f'{self.user}둜 λ‘œκ·ΈμΈλ˜μ—ˆμŠ΅λ‹ˆλ‹€!')
# web.py 파일 μ‹€ν–‰
subprocess.Popen(["python", "web.py"])
logging.info("Web.py server has been started.")
# 봇이 μ‹œμž‘λ  λ•Œ μ•ˆλ‚΄ λ©”μ‹œμ§€λ₯Ό 전솑
channel = self.get_channel(SPECIFIC_CHANNEL_ID)
if channel:
await channel.send("유튜브 λΉ„λ””μ˜€ URL을 μž…λ ₯ν•˜λ©΄, μžλ§‰κ³Ό λŒ“κΈ€μ„ 기반으둜 닡글을 μž‘μ„±ν•©λ‹ˆλ‹€.")
async def on_message(self, message):
if message.author == self.user:
return
if not self.is_message_in_specific_channel(message):
return
if self.is_processing:
return
self.is_processing = True
try:
video_id = extract_video_id(message.content)
if video_id:
transcript = await get_best_available_transcript(video_id)
comments = await get_video_comments(video_id)
if comments and transcript:
replies = await generate_replies(comments, transcript)
await create_thread_and_send_replies(message, video_id, comments, replies)
await post_replies_to_youtube(video_id, comments, replies)
else:
await message.channel.send("μžλ§‰μ΄λ‚˜ λŒ“κΈ€μ„ κ°€μ Έμ˜¬ 수 μ—†μŠ΅λ‹ˆλ‹€.")
else:
await message.channel.send("μœ νš¨ν•œ 유튜브 λΉ„λ””μ˜€ URL을 μ œκ³΅ν•΄ μ£Όμ„Έμš”.")
finally:
self.is_processing = False
def is_message_in_specific_channel(self, message):
# λ©”μ‹œμ§€κ°€ μ§€μ •λœ μ±„λ„μ΄κ±°λ‚˜, ν•΄λ‹Ή μ±„λ„μ˜ μ“°λ ˆλ“œμΈ 경우 True λ°˜ν™˜
return message.channel.id == SPECIFIC_CHANNEL_ID or (
isinstance(message.channel, discord.Thread) and message.channel.parent_id == SPECIFIC_CHANNEL_ID
)
def extract_video_id(url):
"""
YouTube λΉ„λ””μ˜€ URLμ—μ„œ λΉ„λ””μ˜€ IDλ₯Ό μΆ”μΆœν•©λ‹ˆλ‹€.
"""
video_id = None
youtube_regex = (
r'(https?://)?(www\.)?'
'(youtube|youtu|youtube-nocookie)\.(com|be)/'
'(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})')
match = re.match(youtube_regex, url)
if match:
video_id = match.group(6)
logging.debug(f'Extracted video ID: {video_id}')
return video_id
async def get_best_available_transcript(video_id):
"""
YouTube λΉ„λ””μ˜€μ˜ μžλ§‰μ„ κ°€μ Έμ˜΅λ‹ˆλ‹€.
"""
try:
# ν•œκ΅­μ–΄ μžλ§‰ μ‹œλ„
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['ko'])
except Exception as e:
logging.warning(f'Error fetching Korean transcript: {e}')
try:
# ν•œκ΅­μ–΄ μžλ§‰μ΄ μ—†μœΌλ©΄ μ˜μ–΄ μžλ§‰ μ‹œλ„
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
except Exception as e:
logging.warning(f'Error fetching English transcript: {e}')
try:
# μ˜μ–΄ μžλ§‰λ„ μ—†μœΌλ©΄ λ‹€λ₯Έ μ–Έμ–΄ μžλ§‰ μ‹œλ„
transcripts = YouTubeTranscriptApi.list_transcripts(video_id)
transcript = transcripts.find_manually_created_transcript().fetch()
except Exception as e:
logging.error(f'Error fetching alternative transcript: {e}')
return None
# μžλ§‰ ν¬λ§·νŒ…
formatter = TextFormatter()
transcript_text = formatter.format_transcript(transcript)
logging.debug(f'Fetched transcript: {transcript_text}')
return transcript_text
async def get_video_comments(video_id):
"""
YouTube λΉ„λ””μ˜€μ˜ λŒ“κΈ€μ„ κ°€μ Έμ˜΅λ‹ˆλ‹€.
"""
comments = []
response = youtube_service.commentThreads().list(
part='snippet',
videoId=video_id,
maxResults=100 # μ΅œλŒ€ 100개의 λŒ“κΈ€ κ°€μ Έμ˜€κΈ°
).execute()
for item in response.get('items', []):
comment = item['snippet']['topLevelComment']['snippet']['textOriginal']
comment_id = item['snippet']['topLevelComment']['id']
comments.append((comment, comment_id)) # λŒ“κΈ€κ³Ό λŒ“κΈ€ IDλ₯Ό ν•¨κ»˜ μ €μž₯
logging.debug(f'Fetched comments: {comments}')
return comments
async def generate_replies(comments, transcript):
"""
λŒ“κΈ€κ³Ό μžλ§‰μ„ 기반으둜 LLM 닡글을 μƒμ„±ν•©λ‹ˆλ‹€.
"""
replies = []
for comment, _ in comments:
messages = [
{"role": "system", "content": f"λΉ„λ””μ˜€ μžλ§‰: {transcript}"},
{"role": "user", "content": comment}
]
loop = asyncio.get_event_loop()
response = await loop.run_in_executor(None, lambda: hf_client.chat_completion(
messages, max_tokens=400, temperature=0.7, top_p=0.85)) # max_tokens 값을 μ‘°μ •
if response.choices and response.choices[0].message:
reply = response.choices[0].message['content'].strip()
else:
reply = "닡글을 생성할 수 μ—†μŠ΅λ‹ˆλ‹€."
replies.append(reply)
logging.debug(f'Generated replies: {replies}')
return replies
async def create_thread_and_send_replies(message, video_id, comments, replies):
"""
λŒ“κΈ€κ³Ό 닡글을 μƒˆλ‘œμš΄ μ“°λ ˆλ“œμ— μ „μ†‘ν•©λ‹ˆλ‹€.
"""
thread = await message.channel.create_thread(name=f"{message.author.name}의 λŒ“κΈ€ λ‹΅κΈ€", message=message)
for (comment, _), reply in zip(comments, replies):
embed = discord.Embed(description=f"**λŒ“κΈ€**: {comment}\n**λ‹΅κΈ€**: {reply}")
await thread.send(embed=embed)
async def post_replies_to_youtube(video_id, comments, replies):
"""
μƒμ„±λœ 닡글을 YouTube λŒ“κΈ€λ‘œ κ²Œμ‹œν•©λ‹ˆλ‹€.
"""
for (comment, comment_id), reply in zip(comments, replies):
try:
youtube_service.comments().insert(
part='snippet',
body={
'snippet': {
'parentId': comment_id,
'textOriginal': reply
}
}
).execute()
logging.debug(f'Posted reply to comment: {comment_id}')
except Exception as e:
logging.error(f'Error posting reply to comment {comment_id}: {e}')
if __name__ == "__main__":
discord_client = MyClient(intents=intents)
discord_client.run(os.getenv('DISCORD_TOKEN'))