just4fun / app.py
ychenNLP's picture
Update app.py
4f4216e
import gradio as gr
import openai
import yt_dlp
import os
import io
import tempfile
from pydub import AudioSegment
def split_audio(file_path, chunk_length_ms):
audio = AudioSegment.from_file(file_path)
duration = len(audio)
chunks = []
start_time = 0
while start_time < duration:
end_time = start_time + chunk_length_ms
if end_time > duration:
end_time = duration
chunk = audio[start_time:end_time]
chunks.append(chunk)
start_time += chunk_length_ms
return chunks
def split_string_by_tokens(text, max_tokens=500):
words = text.split()
chunks = []
current_chunk = []
for word in words:
current_chunk.append(word)
if len(current_chunk) >= max_tokens:
chunks.append(' '.join(current_chunk))
current_chunk = []
if current_chunk:
chunks.append(' '.join(current_chunk))
return chunks
openai.api_key = os.environ['OPENAI_API_KEY']
def asr(url):
# delete the video
os.system("rm *audio_download*")
# download audio
# Options for youtube-dl
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': 'audio_downloaded.%(ext)s',
'no_continue': True,
}
# Create a youtube-dl object
ydl = yt_dlp.YoutubeDL(ydl_opts)
# Download the video
info_dict = ydl.extract_info(url, download=True)
if info_dict is not None:
audio_file_name = "audio_downloaded.{}".format(info_dict["ext"])
else:
return "下载音频发生错误,请确认链接再试一次。", "Error downloading the audio. Check the URL and try again."
yield "下载视频完成. 开始分割视频...", ""
chunks = split_audio(audio_file_name, chunk_length_ms=30 * 1000)
transcripts = []
for idx, chunk in enumerate(chunks):
temp_file_path = None
with tempfile.NamedTemporaryFile(mode="wb", suffix=".wav", delete=False) as temp_file:
temp_file_path = temp_file.name
chunk.export(temp_file.name, format="wav")
with open(temp_file_path, "rb") as temp_file:
transcript = openai.Audio.transcribe("whisper-1", temp_file)
os.remove(temp_file_path)
transcripts.append(transcript["text"])
yield "请耐心等待语音识别完成...({}/{})".format(idx + 1, len(chunks)), " ".join(transcripts)
# delete the video
os.system("rm {}".format(audio_file_name))
translations = []
full_transcript = " ".join(transcripts)
# split into 500 tokens
transcript_chunks = split_string_by_tokens(full_transcript, max_tokens=500)
yield "语音识别完成, 开始翻译...(0/{})".format(len(transcript_chunks)), full_transcript
# split transcripts if its too long
for idx, transcript in enumerate(transcript_chunks):
output = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "user",
"content": "Transcript: {transcript}. \n Translate the video conversation transcript into fluent Chinese. Chinese: ".format(transcript=transcript)},
],
stream=True,
)
for event in output:
translations.append(event["choices"][0].get("delta", "").get("content", ""))
yield "请耐心等候翻译:({}/{})...".format(idx+1, len(transcript_chunks)) + "".join(translations), " ".join(transcripts)
full_translation = "".join(translations)
yield full_translation, full_transcript
title = """
轻声细译"""
# Create an instruction input component
instruction = """
<div style="border: 2px solid #000; padding: 10px; border-radius: 5px;">
视频翻译 (video-translation):输入视频链接,进行中文翻译 <span style="color: grey;">-- powered by OpenAI Whisper & ChatGPT.</span>.<br>
1.将视频链接(支持Twitter、YouTube)复制粘贴至输入框,点击提交(Submit)即可;
</div>"""
demo = gr.Interface(fn=asr,
inputs=gr.inputs.Textbox(label="粘贴视频链接"),
outputs=[
gr.outputs.Textbox(label="翻译"),
gr.outputs.Textbox(label="音频转录")
],
title=title,
description=instruction,
theme="JohnSmith9982/small_and_pretty")
demo.queue()
demo.launch()