File size: 3,319 Bytes
dce378c
 
8b1a899
dce378c
2cfede8
b1aeb47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8b1a899
 
dce378c
 
 
8b1a899
 
 
 
 
 
 
2cfede8
dce378c
 
 
b1aeb47
dce378c
 
b1aeb47
dce378c
 
8b1a899
 
dce378c
 
 
 
 
 
 
 
 
 
 
 
161ad03
b1aeb47
2cfede8
 
 
 
 
 
 
 
 
 
b1aeb47
 
 
 
8b1a899
 
dce378c
 
 
 
b1aeb47
 
 
 
 
dce378c
b1aeb47
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
from pytube import YouTube
from google.cloud import speech_v1p1beta1 as speech
from pydub import AudioSegment  # μ˜€λ””μ˜€ λ³€ν™˜μ„ μœ„ν•œ 라이브러리
import io
import openai
import os
import gradio as gr

# 유튜브 λΉ„λ””μ˜€ ID μΆ”μΆœ ν•¨μˆ˜
def get_yt_video_id(url):
    from urllib.parse import urlparse, parse_qs

    if url.startswith(('youtu', 'www')):
        url = 'http://' + url
        
    query = urlparse(url)
    
    if 'youtube' in query.hostname:
        if query.path == '/watch':
            return parse_qs(query.query)['v'][0]
        elif query.path.startswith(('/embed/', '/v/')):
            return query.path.split('/')[2]
    elif 'youtu.be' in query.hostname:
        return query.path[1:]
    else:
        raise ValueError("μœ νš¨ν•œ 유튜브 링크가 μ•„λ‹™λ‹ˆλ‹€.")

# μ˜€λ””μ˜€ μΆ”μΆœ 및 λ³€ν™˜ ν•¨μˆ˜ (WAV ν˜•μ‹μœΌλ‘œ λ³€ν™˜)
def download_and_convert_audio(youtube_url):
    yt = YouTube(youtube_url)
    stream = yt.streams.filter(only_audio=True).first()
    audio_path = stream.download(filename="audio.mp4")
    
    # μ˜€λ””μ˜€ νŒŒμΌμ„ WAV둜 λ³€ν™˜ (16000Hz μƒ˜ν”Œ 레이트)
    audio = AudioSegment.from_file(audio_path)
    wav_audio_path = "converted_audio.wav"
    audio.set_frame_rate(16000).export(wav_audio_path, format="wav")
    
    return wav_audio_path

# Google Speech-to-Text APIλ₯Ό μ‚¬μš©ν•˜μ—¬ μ˜€λ””μ˜€λ₯Ό ν…μŠ€νŠΈλ‘œ λ³€ν™˜
def speech_to_text(audio_path):
    client = speech.SpeechClient()

    with io.open(audio_path, "rb") as audio_file:
        content = audio_file.read()

    audio = speech.RecognitionAudio(content=content)
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=16000,  # 16000Hz μƒ˜ν”Œ 레이트
        language_code="ko-KR"  # ν•œκ΅­μ–΄ 인식
    )

    response = client.recognize(config=config, audio=audio)
    
    transcript = ""
    for result in response.results:
        transcript += result.alternatives[0].transcript + " "
    
    return transcript.strip()

# ν…μŠ€νŠΈλ₯Ό μš”μ•½ν•˜λŠ” ν•¨μˆ˜ (OpenAI API μ‚¬μš©)
def textToSummary(text):
    openai.api_key = os.getenv("OPENAI_API_KEY")  # ν™˜κ²½ λ³€μˆ˜μ—μ„œ OpenAI API ν‚€ κ°€μ Έμ˜€κΈ°
    response = openai.Completion.create(
        model="text-davinci-003",
        prompt="Summarize this in 200 words or less:\n\n" + text,
        temperature=0.7,
        max_tokens=400,
        top_p=1.0,
        frequency_penalty=0.0,
        presence_penalty=1
    )
    return response["choices"][0]["text"].replace("\n", " ").strip()

# 전체 μš”μ•½ ν”„λ‘œμ„ΈμŠ€λ₯Ό μ²˜λ¦¬ν•˜λŠ” ν•¨μˆ˜
def summarize(url):
    try:
        # 유튜브 μ˜€λ””μ˜€ λ‹€μš΄λ‘œλ“œ 및 λ³€ν™˜
        audio_path = download_and_convert_audio(url)
        # μŒμ„±μ„ ν…μŠ€νŠΈλ‘œ λ³€ν™˜
        transcript = speech_to_text(audio_path)
        # ν…μŠ€νŠΈ μš”μ•½
        summary = textToSummary(transcript)
        return summary
    except Exception as e:
        return f"μš”μ•½μ— μ‹€νŒ¨ν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"

# Gradio μΈν„°νŽ˜μ΄μŠ€ μ„€μ •
description = "유튜브 λ™μ˜μƒμ˜ μžλ§‰μ΄ 없더라도 μŒμ„± 인식 κΈ°λŠ₯을 μ‚¬μš©ν•΄ μš”μ•½ν•©λ‹ˆλ‹€."

gr.Interface(fn=summarize,
             inputs="text",
             outputs="textbox", 
             description=description
            ).launch()