Spaces:
Sleeping
Sleeping
File size: 3,319 Bytes
dce378c 8b1a899 dce378c 2cfede8 b1aeb47 8b1a899 dce378c 8b1a899 2cfede8 dce378c b1aeb47 dce378c b1aeb47 dce378c 8b1a899 dce378c 161ad03 b1aeb47 2cfede8 b1aeb47 8b1a899 dce378c b1aeb47 dce378c b1aeb47 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
from pytube import YouTube
from google.cloud import speech_v1p1beta1 as speech
from pydub import AudioSegment # μ€λμ€ λ³νμ μν λΌμ΄λΈλ¬λ¦¬
import io
import openai
import os
import gradio as gr
# μ νλΈ λΉλμ€ ID μΆμΆ ν¨μ
def get_yt_video_id(url):
from urllib.parse import urlparse, parse_qs
if url.startswith(('youtu', 'www')):
url = 'http://' + url
query = urlparse(url)
if 'youtube' in query.hostname:
if query.path == '/watch':
return parse_qs(query.query)['v'][0]
elif query.path.startswith(('/embed/', '/v/')):
return query.path.split('/')[2]
elif 'youtu.be' in query.hostname:
return query.path[1:]
else:
raise ValueError("μ ν¨ν μ νλΈ λ§ν¬κ° μλλλ€.")
# μ€λμ€ μΆμΆ λ° λ³ν ν¨μ (WAV νμμΌλ‘ λ³ν)
def download_and_convert_audio(youtube_url):
yt = YouTube(youtube_url)
stream = yt.streams.filter(only_audio=True).first()
audio_path = stream.download(filename="audio.mp4")
# μ€λμ€ νμΌμ WAVλ‘ λ³ν (16000Hz μν λ μ΄νΈ)
audio = AudioSegment.from_file(audio_path)
wav_audio_path = "converted_audio.wav"
audio.set_frame_rate(16000).export(wav_audio_path, format="wav")
return wav_audio_path
# Google Speech-to-Text APIλ₯Ό μ¬μ©νμ¬ μ€λμ€λ₯Ό ν
μ€νΈλ‘ λ³ν
def speech_to_text(audio_path):
client = speech.SpeechClient()
with io.open(audio_path, "rb") as audio_file:
content = audio_file.read()
audio = speech.RecognitionAudio(content=content)
config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000, # 16000Hz μν λ μ΄νΈ
language_code="ko-KR" # νκ΅μ΄ μΈμ
)
response = client.recognize(config=config, audio=audio)
transcript = ""
for result in response.results:
transcript += result.alternatives[0].transcript + " "
return transcript.strip()
# ν
μ€νΈλ₯Ό μμ½νλ ν¨μ (OpenAI API μ¬μ©)
def textToSummary(text):
openai.api_key = os.getenv("OPENAI_API_KEY") # νκ²½ λ³μμμ OpenAI API ν€ κ°μ Έμ€κΈ°
response = openai.Completion.create(
model="text-davinci-003",
prompt="Summarize this in 200 words or less:\n\n" + text,
temperature=0.7,
max_tokens=400,
top_p=1.0,
frequency_penalty=0.0,
presence_penalty=1
)
return response["choices"][0]["text"].replace("\n", " ").strip()
# μ 체 μμ½ νλ‘μΈμ€λ₯Ό μ²λ¦¬νλ ν¨μ
def summarize(url):
try:
# μ νλΈ μ€λμ€ λ€μ΄λ‘λ λ° λ³ν
audio_path = download_and_convert_audio(url)
# μμ±μ ν
μ€νΈλ‘ λ³ν
transcript = speech_to_text(audio_path)
# ν
μ€νΈ μμ½
summary = textToSummary(transcript)
return summary
except Exception as e:
return f"μμ½μ μ€ν¨νμ΅λλ€: {str(e)}"
# Gradio μΈν°νμ΄μ€ μ€μ
description = "μ νλΈ λμμμ μλ§μ΄ μλλΌλ μμ± μΈμ κΈ°λ₯μ μ¬μ©ν΄ μμ½ν©λλ€."
gr.Interface(fn=summarize,
inputs="text",
outputs="textbox",
description=description
).launch()
|