Spaces:

ychenNLP
/

just4fun

Runtime error

App Files Files Community

ychenNLP commited on Apr 28, 2023

Commit

922050b

•

1 Parent(s): 0648a36

Update app.py

Browse files

streaming output + supporting long video

Files changed (1) hide show

app.py +78 -15

app.py CHANGED Viewed

@@ -2,9 +2,45 @@ import gradio as gr
 import openai
 import yt_dlp
 import os
 openai.api_key = os.environ['OPENAI_API_KEY']
 def asr(url):
     # download audio
     # Options for youtube-dl
     ydl_opts = {
@@ -22,21 +58,49 @@ def asr(url):
         audio_file_name = "audio_downloaded.{}".format(info_dict["ext"])
     else:
         return "下载音频发生错误，请确认链接再试一次。", "Error downloading the audio. Check the URL and try again."
-    audio_file= open(audio_file_name, "rb")
-    try:
-        transcript = openai.Audio.transcribe("whisper-1", audio_file)
-    except:
-        return "视频过大（超过25mb）了，无法处理。", "The audio file is too big (25mb)."
-    output = openai.ChatCompletion.create(
-    model="gpt-3.5-turbo",
-    messages=[
-            {"role": "user", "content": "Transcript: {transcript}. \n Translate the video conversation transcript into fluent Chinese. Chinese: ".format(transcript=transcript["text"])},
-        ]
-    )
     # delete the video
     os.system("rm {}".format(audio_file_name))
-    return output['choices'][0]['message']['content'], transcript["text"]
 title = """
 轻声细译"""
@@ -46,7 +110,6 @@ instruction = """
 一键输入视频链接，轻松实现中文翻译，畅享视频无障碍沟通 <span style="color: grey;">-- powered by OpenAI Whisper & ChatGPT.</span>.<br>
 1.将视频链接（支持Twitter、YouTube）复制粘贴至输入框，点击提交（Submit）即可;
-2.为保证翻译质量，目前仅支持处理时长不超过5分钟的短视频。
 </div>"""
 # Create a text input component
 text_input = gr.inputs.Textbox()
@@ -58,6 +121,6 @@ demo = gr.Interface(fn=asr,
                         gr.outputs.Textbox(label="英文")
                     ],
                     title=title,
-                    description=instruction,theme='huggingface',)
 demo.launch()

 import openai
 import yt_dlp
 import os
+import io
+import tempfile
+from pydub import AudioSegment
+def split_audio(file_path, chunk_length_ms):
+    audio = AudioSegment.from_file(file_path)
+    duration = len(audio)
+    chunks = []
+    start_time = 0
+    while start_time < duration:
+        end_time = start_time + chunk_length_ms
+        if end_time > duration:
+            end_time = duration
+        chunk = audio[start_time:end_time]
+        chunks.append(chunk)
+        start_time += chunk_length_ms
+    return chunks
+def split_string_by_tokens(text, max_tokens=500):
+    words = text.split()
+    chunks = []
+    current_chunk = []
+    for word in words:
+        current_chunk.append(word)
+        if len(current_chunk) >= max_tokens:
+            chunks.append(' '.join(current_chunk))
+            current_chunk = []
+    if current_chunk:
+        chunks.append(' '.join(current_chunk))
+    return chunks
 openai.api_key = os.environ['OPENAI_API_KEY']
 def asr(url):
+    # delete the video
+    os.system("rm *audio_download*")
     # download audio
     # Options for youtube-dl
     ydl_opts = {
         audio_file_name = "audio_downloaded.{}".format(info_dict["ext"])
     else:
         return "下载音频发生错误，请确认链接再试一次。", "Error downloading the audio. Check the URL and try again."
+    yield "下载视频完成. 开始分割视频...", ""
+    chunks = split_audio(audio_file_name, chunk_length_ms=30 * 1000)
+    transcripts = []
+    for idx, chunk in enumerate(chunks):
+        temp_file_path = None
+        with tempfile.NamedTemporaryFile(mode="wb", suffix=".wav", delete=False) as temp_file:
+            temp_file_path = temp_file.name
+            chunk.export(temp_file.name, format="wav")
+        with open(temp_file_path, "rb") as temp_file:
+            transcript = openai.Audio.transcribe("whisper-1", temp_file)
+        os.remove(temp_file_path)
+        transcripts.append(transcript["text"])
+        yield "请耐心等待语音识别完成...({}/{})".format(idx + 1, len(chunks)), " ".join(transcripts)
     # delete the video
     os.system("rm {}".format(audio_file_name))
+    translations = []
+    full_transcript = " ".join(transcripts)
+    # split into 500 tokens
+    transcript_chunks = split_string_by_tokens(full_transcript, max_tokens=500)
+    yield "语音识别完成, 开始翻译...(0/{})".format(len(transcript_chunks)), full_transcript
+    # split transcripts if its too long
+    for idx, transcript in enumerate(transcript_chunks):
+        output = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=[
+                {"role": "user", "content": "Transcript: {transcript}. \n Translate the video conversation transcript into fluent Chinese. Chinese: ".format(transcript=transcript)},
+            ]
+        )
+        translation = output['choices'][0]['message']['content']
+        translations.append(translation)
+        yield "请耐心等候翻译：({}/{})...".format(idx+1, len(transcript_chunks)) + " ".join(translations), " ".join(transcripts)
+    full_translation = " ".join(translations)
+    yield full_translation, full_transcript
 title = """
 轻声细译"""
 一键输入视频链接，轻松实现中文翻译，畅享视频无障碍沟通 <span style="color: grey;">-- powered by OpenAI Whisper & ChatGPT.</span>.<br>
 1.将视频链接（支持Twitter、YouTube）复制粘贴至输入框，点击提交（Submit）即可;
 </div>"""
 # Create a text input component
 text_input = gr.inputs.Textbox()
                         gr.outputs.Textbox(label="英文")
                     ],
                     title=title,
+                    description=instruction,theme='huggingface')
+demo.queue()
 demo.launch()