ychenNLP commited on
Commit
922050b
1 Parent(s): 0648a36

Update app.py

Browse files

streaming output + supporting long video

Files changed (1) hide show
  1. app.py +78 -15
app.py CHANGED
@@ -2,9 +2,45 @@ import gradio as gr
2
  import openai
3
  import yt_dlp
4
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  openai.api_key = os.environ['OPENAI_API_KEY']
6
 
7
  def asr(url):
 
 
8
  # download audio
9
  # Options for youtube-dl
10
  ydl_opts = {
@@ -22,21 +58,49 @@ def asr(url):
22
  audio_file_name = "audio_downloaded.{}".format(info_dict["ext"])
23
  else:
24
  return "下载音频发生错误,请确认链接再试一次。", "Error downloading the audio. Check the URL and try again."
25
- audio_file= open(audio_file_name, "rb")
26
- try:
27
- transcript = openai.Audio.transcribe("whisper-1", audio_file)
28
- except:
29
- return "视频过大(超过25mb)了,无法处理。", "The audio file is too big (25mb)."
30
- output = openai.ChatCompletion.create(
31
- model="gpt-3.5-turbo",
32
- messages=[
33
- {"role": "user", "content": "Transcript: {transcript}. \n Translate the video conversation transcript into fluent Chinese. Chinese: ".format(transcript=transcript["text"])},
34
- ]
35
- )
 
 
 
36
 
 
 
 
 
 
37
  # delete the video
38
  os.system("rm {}".format(audio_file_name))
39
- return output['choices'][0]['message']['content'], transcript["text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  title = """
42
  轻声细译"""
@@ -46,7 +110,6 @@ instruction = """
46
  一键输入视频链接,轻松实现中文翻译,畅享视频无障碍沟通 <span style="color: grey;">-- powered by OpenAI Whisper & ChatGPT.</span>.<br>
47
 
48
  1.将视频链接(支持Twitter、YouTube)复制粘贴至输入框,点击提交(Submit)即可;
49
- 2.为保证翻译质量,目前仅支持处理时长不超过5分钟的短视频。
50
  </div>"""
51
  # Create a text input component
52
  text_input = gr.inputs.Textbox()
@@ -58,6 +121,6 @@ demo = gr.Interface(fn=asr,
58
  gr.outputs.Textbox(label="英文")
59
  ],
60
  title=title,
61
- description=instruction,theme='huggingface',)
62
-
63
  demo.launch()
 
2
  import openai
3
  import yt_dlp
4
  import os
5
+ import io
6
+ import tempfile
7
+ from pydub import AudioSegment
8
+
9
+ def split_audio(file_path, chunk_length_ms):
10
+ audio = AudioSegment.from_file(file_path)
11
+ duration = len(audio)
12
+ chunks = []
13
+ start_time = 0
14
+ while start_time < duration:
15
+ end_time = start_time + chunk_length_ms
16
+ if end_time > duration:
17
+ end_time = duration
18
+ chunk = audio[start_time:end_time]
19
+ chunks.append(chunk)
20
+ start_time += chunk_length_ms
21
+ return chunks
22
+
23
+ def split_string_by_tokens(text, max_tokens=500):
24
+ words = text.split()
25
+ chunks = []
26
+ current_chunk = []
27
+
28
+ for word in words:
29
+ current_chunk.append(word)
30
+ if len(current_chunk) >= max_tokens:
31
+ chunks.append(' '.join(current_chunk))
32
+ current_chunk = []
33
+
34
+ if current_chunk:
35
+ chunks.append(' '.join(current_chunk))
36
+
37
+ return chunks
38
+
39
  openai.api_key = os.environ['OPENAI_API_KEY']
40
 
41
  def asr(url):
42
+ # delete the video
43
+ os.system("rm *audio_download*")
44
  # download audio
45
  # Options for youtube-dl
46
  ydl_opts = {
 
58
  audio_file_name = "audio_downloaded.{}".format(info_dict["ext"])
59
  else:
60
  return "下载音频发生错误,请确认链接再试一次。", "Error downloading the audio. Check the URL and try again."
61
+
62
+ yield "下载视频完成. 开始分割视频...", ""
63
+ chunks = split_audio(audio_file_name, chunk_length_ms=30 * 1000)
64
+ transcripts = []
65
+
66
+ for idx, chunk in enumerate(chunks):
67
+
68
+ temp_file_path = None
69
+ with tempfile.NamedTemporaryFile(mode="wb", suffix=".wav", delete=False) as temp_file:
70
+ temp_file_path = temp_file.name
71
+ chunk.export(temp_file.name, format="wav")
72
+
73
+ with open(temp_file_path, "rb") as temp_file:
74
+ transcript = openai.Audio.transcribe("whisper-1", temp_file)
75
 
76
+ os.remove(temp_file_path)
77
+ transcripts.append(transcript["text"])
78
+
79
+ yield "请耐心等待语音识别完成...({}/{})".format(idx + 1, len(chunks)), " ".join(transcripts)
80
+
81
  # delete the video
82
  os.system("rm {}".format(audio_file_name))
83
+
84
+ translations = []
85
+ full_transcript = " ".join(transcripts)
86
+ # split into 500 tokens
87
+ transcript_chunks = split_string_by_tokens(full_transcript, max_tokens=500)
88
+ yield "语音识别完成, 开始翻译...(0/{})".format(len(transcript_chunks)), full_transcript
89
+ # split transcripts if its too long
90
+ for idx, transcript in enumerate(transcript_chunks):
91
+ output = openai.ChatCompletion.create(
92
+ model="gpt-3.5-turbo",
93
+ messages=[
94
+ {"role": "user", "content": "Transcript: {transcript}. \n Translate the video conversation transcript into fluent Chinese. Chinese: ".format(transcript=transcript)},
95
+ ]
96
+ )
97
+ translation = output['choices'][0]['message']['content']
98
+ translations.append(translation)
99
+
100
+ yield "请耐心等候翻译:({}/{})...".format(idx+1, len(transcript_chunks)) + " ".join(translations), " ".join(transcripts)
101
+
102
+ full_translation = " ".join(translations)
103
+ yield full_translation, full_transcript
104
 
105
  title = """
106
  轻声细译"""
 
110
  一键输入视频链接,轻松实现中文翻译,畅享视频无障碍沟通 <span style="color: grey;">-- powered by OpenAI Whisper & ChatGPT.</span>.<br>
111
 
112
  1.将视频链接(支持Twitter、YouTube)复制粘贴至输入框,点击提交(Submit)即可;
 
113
  </div>"""
114
  # Create a text input component
115
  text_input = gr.inputs.Textbox()
 
121
  gr.outputs.Textbox(label="英文")
122
  ],
123
  title=title,
124
+ description=instruction,theme='huggingface')
125
+ demo.queue()
126
  demo.launch()