Spaces:
Runtime error
Runtime error
thealphamerc
commited on
Commit
·
003983b
1
Parent(s):
2d6bfef
Added examples
Browse files- .gitattributes +1 -0
- .gitignore +3 -1
- app.py +53 -67
- input/example-1.wav +3 -0
- input/example-2.wav +3 -0
.gitattributes
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
*.wav filter=lfs diff=lfs merge=lfs -text
|
.gitignore
CHANGED
@@ -1 +1,3 @@
|
|
1 |
-
|
|
|
|
|
|
1 |
+
*.srt
|
2 |
+
*.mp4
|
3 |
+
*.raw
|
app.py
CHANGED
@@ -1,13 +1,13 @@
|
|
1 |
import os
|
2 |
-
import logging
|
3 |
os.system("pip install git+https://github.com/openai/whisper.git")
|
4 |
-
import gradio as gr
|
5 |
-
from subprocess import call
|
6 |
-
import whisper
|
7 |
-
from datetime import timedelta
|
8 |
-
from pytube import YouTube
|
9 |
-
import pandas as pd
|
10 |
import pysrt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
# from transformers.pipelines.audio_utils import ffmpeg_read
|
12 |
|
13 |
|
@@ -21,11 +21,7 @@ ch.setFormatter(formatter)
|
|
21 |
logger.addHandler(ch)
|
22 |
|
23 |
|
24 |
-
BATCH_SIZE = 16
|
25 |
-
CHUNK_LENGTH_S = 30
|
26 |
-
NUM_PROC = 8
|
27 |
FILE_LIMIT_MB = 1000
|
28 |
-
YT_ATTEMPT_LIMIT = 3
|
29 |
|
30 |
|
31 |
def run_cmd(command):
|
@@ -44,7 +40,6 @@ def inference(text):
|
|
44 |
|
45 |
|
46 |
baseModel = whisper.load_model("base")
|
47 |
-
smallModel = whisper.load_model("small")
|
48 |
|
49 |
|
50 |
df_init = pd.DataFrame(columns=['start', 'end', 'text'])
|
@@ -52,35 +47,45 @@ transcription_df = gr.DataFrame(value=df_init, label="Transcription dataframe",
|
|
52 |
0, "dynamic"), max_rows=30, wrap=True, overflow_row_behaviour='paginate')
|
53 |
|
54 |
|
55 |
-
inputs = gr.components.Audio(type="filepath", label="Add audio file")
|
|
|
56 |
outputs = [gr.components.Textbox(), transcription_df]
|
57 |
title = "Transcribe multi-lingual audio clips"
|
58 |
-
description = "An example of using
|
59 |
article = ""
|
60 |
-
|
61 |
-
[""]
|
|
|
62 |
]
|
63 |
|
64 |
|
65 |
-
def transcribe(inputs):
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
if inputs is None:
|
70 |
logger.warning("No audio file")
|
71 |
-
return "
|
72 |
file_size_mb = os.stat(inputs).st_size / (1024 * 1024)
|
|
|
|
|
73 |
if file_size_mb > FILE_LIMIT_MB:
|
74 |
logger.warning("Max file size exceeded")
|
75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
-
#
|
78 |
-
|
|
|
|
|
79 |
|
80 |
-
#
|
81 |
-
result = smallModel.transcribe(audio=inputs, language='english',
|
82 |
-
verbose=False)
|
83 |
-
# ---------------------------------------------------
|
84 |
segments = result['segments']
|
85 |
for segment in segments:
|
86 |
startTime = str(0)+str(timedelta(seconds=int(segment['start'])))+',000'
|
@@ -89,17 +94,11 @@ def transcribe(inputs):
|
|
89 |
segmentId = segment['id']+1
|
90 |
segment = f"{segmentId}\n{startTime} --> {endTime}\n{text[1:] if text[0] is ' ' else text}\n\n"
|
91 |
|
92 |
-
srtFilename = os.path.join("output/SrtFiles", inputs.split(
|
93 |
-
'/')[-1].split('.')[0]+'.srt')
|
94 |
with open(srtFilename, 'a', encoding='utf-8') as srtFile:
|
95 |
srtFile.write(segment)
|
96 |
|
97 |
-
|
98 |
-
'/')[-1].split('.')[0]+'.srt')
|
99 |
-
with open(rawFilename, 'a', encoding='utf-8') as srtFile:
|
100 |
-
srtFile.write(segment)
|
101 |
try:
|
102 |
-
|
103 |
srt_path = srtFilename
|
104 |
df = pd.DataFrame(columns=['start', 'end', 'text'])
|
105 |
subs = pysrt.open(srt_path)
|
@@ -129,7 +128,7 @@ def transcribe(inputs):
|
|
129 |
df = pd.DataFrame(objects, columns=['start', 'end', 'text'])
|
130 |
except Exception as e:
|
131 |
print('Error: ', e)
|
132 |
-
df =
|
133 |
|
134 |
return [result["text"], df]
|
135 |
|
@@ -205,23 +204,24 @@ audio_chunked = gr.Interface(
|
|
205 |
title=title,
|
206 |
description=description,
|
207 |
article=article,
|
|
|
208 |
)
|
209 |
|
210 |
-
microphone_chunked = gr.Interface(
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
)
|
225 |
youtube_chunked = gr.Interface(
|
226 |
fn=youtube_transcript,
|
227 |
inputs=[
|
@@ -248,21 +248,7 @@ youtube_chunked = gr.Interface(
|
|
248 |
|
249 |
demo = gr.Blocks()
|
250 |
with demo:
|
251 |
-
gr.TabbedInterface([audio_chunked, youtube_chunked
|
252 |
-
"Audio File", "Youtube"
|
253 |
demo.queue(concurrency_count=1, max_size=5)
|
254 |
demo.launch(show_api=False)
|
255 |
-
|
256 |
-
|
257 |
-
# gr.Interface(
|
258 |
-
# inference,
|
259 |
-
# inputs,
|
260 |
-
# outputs,
|
261 |
-
# verbose=True,
|
262 |
-
# title=title,
|
263 |
-
# description=description,
|
264 |
-
# article=article,
|
265 |
-
# examples=examples,
|
266 |
-
# enable_queue=True,
|
267 |
-
|
268 |
-
# ).launch(share=True, debug=True)
|
|
|
1 |
import os
|
|
|
2 |
os.system("pip install git+https://github.com/openai/whisper.git")
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
import pysrt
|
4 |
+
import pandas as pd
|
5 |
+
from pytube import YouTube
|
6 |
+
from datetime import timedelta
|
7 |
+
import whisper
|
8 |
+
from subprocess import call
|
9 |
+
import gradio as gr
|
10 |
+
import logging
|
11 |
# from transformers.pipelines.audio_utils import ffmpeg_read
|
12 |
|
13 |
|
|
|
21 |
logger.addHandler(ch)
|
22 |
|
23 |
|
|
|
|
|
|
|
24 |
FILE_LIMIT_MB = 1000
|
|
|
25 |
|
26 |
|
27 |
def run_cmd(command):
|
|
|
40 |
|
41 |
|
42 |
baseModel = whisper.load_model("base")
|
|
|
43 |
|
44 |
|
45 |
df_init = pd.DataFrame(columns=['start', 'end', 'text'])
|
|
|
47 |
0, "dynamic"), max_rows=30, wrap=True, overflow_row_behaviour='paginate')
|
48 |
|
49 |
|
50 |
+
inputs = [gr.components.Audio(type="filepath", label="Add audio file"), gr.inputs.Audio(source="microphone",
|
51 |
+
optional=True, type="filepath"),]
|
52 |
outputs = [gr.components.Textbox(), transcription_df]
|
53 |
title = "Transcribe multi-lingual audio clips"
|
54 |
+
description = "An example of using OpenAi whisper to generate transcriptions for audio clips."
|
55 |
article = ""
|
56 |
+
audio_examples = [
|
57 |
+
["input/example-1.wav"],
|
58 |
+
["input/example-2.wav"],
|
59 |
]
|
60 |
|
61 |
|
62 |
+
def transcribe(inputs, microphone):
|
63 |
+
if (microphone is not None):
|
64 |
+
inputs = microphone
|
65 |
+
|
66 |
if inputs is None:
|
67 |
logger.warning("No audio file")
|
68 |
+
return [f"File size exceeds file size limit. Got file of size {file_size_mb:.2f}MB for a limit of {FILE_LIMIT_MB}MB.", df_init]
|
69 |
file_size_mb = os.stat(inputs).st_size / (1024 * 1024)
|
70 |
+
|
71 |
+
# --------------------------------------------------- Check the file size ---------------------------------------------------
|
72 |
if file_size_mb > FILE_LIMIT_MB:
|
73 |
logger.warning("Max file size exceeded")
|
74 |
+
df = pd.DataFrame(columns=['start', 'end', 'text'])
|
75 |
+
return [f"File size exceeds file size limit. Got file of size {file_size_mb:.2f}MB for a limit of {FILE_LIMIT_MB}MB.", df_init]
|
76 |
+
|
77 |
+
# --------------------------------------------------- Transcribe the audio ---------------------------------------------------
|
78 |
+
result = baseModel.transcribe(audio=inputs, language='english',
|
79 |
+
verbose=False)
|
80 |
+
srtFilename = os.path.join("output/SrtFiles", inputs.split(
|
81 |
+
'/')[-1].split('.')[0]+'.srt')
|
82 |
|
83 |
+
# --------------------------------------------------- Clear the file ---------------------------------------------------
|
84 |
+
with open(srtFilename, 'w', encoding='utf-8') as srtFile:
|
85 |
+
srtFile.seek(0)
|
86 |
+
srtFile.truncate()
|
87 |
|
88 |
+
# --------------------------------------------------- Write the file ---------------------------------------------------
|
|
|
|
|
|
|
89 |
segments = result['segments']
|
90 |
for segment in segments:
|
91 |
startTime = str(0)+str(timedelta(seconds=int(segment['start'])))+',000'
|
|
|
94 |
segmentId = segment['id']+1
|
95 |
segment = f"{segmentId}\n{startTime} --> {endTime}\n{text[1:] if text[0] is ' ' else text}\n\n"
|
96 |
|
|
|
|
|
97 |
with open(srtFilename, 'a', encoding='utf-8') as srtFile:
|
98 |
srtFile.write(segment)
|
99 |
|
100 |
+
# ------------------------------------------- Read the file and Prepare to display ---------------------------------------
|
|
|
|
|
|
|
101 |
try:
|
|
|
102 |
srt_path = srtFilename
|
103 |
df = pd.DataFrame(columns=['start', 'end', 'text'])
|
104 |
subs = pysrt.open(srt_path)
|
|
|
128 |
df = pd.DataFrame(objects, columns=['start', 'end', 'text'])
|
129 |
except Exception as e:
|
130 |
print('Error: ', e)
|
131 |
+
df = df_init
|
132 |
|
133 |
return [result["text"], df]
|
134 |
|
|
|
204 |
title=title,
|
205 |
description=description,
|
206 |
article=article,
|
207 |
+
examples=audio_examples,
|
208 |
)
|
209 |
|
210 |
+
# microphone_chunked = gr.Interface(
|
211 |
+
# fn=transcribe,
|
212 |
+
# inputs=[
|
213 |
+
# gr.inputs.Audio(source="microphone",
|
214 |
+
# optional=True, type="filepath"),
|
215 |
+
# ],
|
216 |
+
# outputs=[
|
217 |
+
# gr.outputs.Textbox(label="Transcription").style(
|
218 |
+
# show_copy_button=True),
|
219 |
+
# ],
|
220 |
+
# allow_flagging="never",
|
221 |
+
# title=title,
|
222 |
+
# description=description,
|
223 |
+
# article=article,
|
224 |
+
# )
|
225 |
youtube_chunked = gr.Interface(
|
226 |
fn=youtube_transcript,
|
227 |
inputs=[
|
|
|
248 |
|
249 |
demo = gr.Blocks()
|
250 |
with demo:
|
251 |
+
gr.TabbedInterface([audio_chunked, youtube_chunked], [
|
252 |
+
"Audio File", "Youtube"])
|
253 |
demo.queue(concurrency_count=1, max_size=5)
|
254 |
demo.launch(show_api=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
input/example-1.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:971b4163670445c415c6b0fb6813c38093409ecac2f6b4d429ae3574d24ad470
|
3 |
+
size 3249924
|
input/example-2.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6c17c6659d9252782e9481764a6ce447bac29ff874cc5c67f9bbf703b7f13743
|
3 |
+
size 692524
|