Spaces:
Runtime error
Runtime error
Commit
·
5da7484
1
Parent(s):
6fc22e6
Update app.py
Browse files
app.py
CHANGED
@@ -9,9 +9,23 @@ import ffmpeg
|
|
9 |
import subprocess
|
10 |
import gradio as gr
|
11 |
import traceback
|
|
|
12 |
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token="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")
|
|
|
13 |
|
|
|
|
|
|
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
def Transcribe(audio="temp_audio.wav"):
|
16 |
def millisec(timeStr):
|
17 |
spl = timeStr.split(":")
|
@@ -33,9 +47,9 @@ def Transcribe(audio="temp_audio.wav"):
|
|
33 |
as_audio = AudioSegment.from_wav(audio)
|
34 |
DEMO_FILE = {'uri': 'blabal', 'audio': audio}
|
35 |
dz = pipeline(DEMO_FILE)
|
36 |
-
with open(f"diarization_{audio}.txt", "w") as text_file:
|
37 |
text_file.write(str(dz))
|
38 |
-
dz = open(f"diarization_{audio}.txt").read().splitlines()
|
39 |
dzList = []
|
40 |
for l in dz:
|
41 |
start, end = tuple(re.findall('[0-9]+:[0-9]+:[0-9]+\.[0-9]+', string=l))
|
@@ -45,7 +59,7 @@ def Transcribe(audio="temp_audio.wav"):
|
|
45 |
dzList.append([start, end, lex])
|
46 |
sounds = spacer
|
47 |
segments = []
|
48 |
-
dz = open(f"diarization_{audio}.txt").read().splitlines()
|
49 |
for l in dz:
|
50 |
start, end = tuple(re.findall('[0-9]+:[0-9]+:[0-9]+\.[0-9]+', string=l))
|
51 |
start = millisec(start)
|
@@ -53,7 +67,7 @@ def Transcribe(audio="temp_audio.wav"):
|
|
53 |
segments.append(len(sounds))
|
54 |
sounds = sounds.append(as_audio[start:end], crossfade=0)
|
55 |
sounds = sounds.append(spacer, crossfade=0)
|
56 |
-
sounds.export(f"dz_{audio}.wav", format="wav")
|
57 |
return f"dz_{audio}.wav", dzList, segments
|
58 |
|
59 |
def transcribe(dz_audio):
|
@@ -82,11 +96,11 @@ def Transcribe(audio="temp_audio.wav"):
|
|
82 |
else:
|
83 |
conversation.append([dzList[i][2], c[2]])
|
84 |
#print(f"[{dzList[i][2]}] {c[2]}")
|
85 |
-
return ("".join([f"{speaker} --> {text}\n" for speaker, text in conversation]))
|
86 |
|
87 |
spacermilli, spacer = preprocess(audio)
|
88 |
dz_audio, dzList, segments = diarization(audio)
|
89 |
-
t_text = transcribe(dz_audio)
|
90 |
try:
|
91 |
os.remove("temp_audio.wav")
|
92 |
except OSError:
|
@@ -99,9 +113,7 @@ def Transcribe(audio="temp_audio.wav"):
|
|
99 |
os.remove(f"diarization_{audio}.txt")
|
100 |
except OSError:
|
101 |
pass
|
102 |
-
return t_text
|
103 |
-
# subprocess.call(['ffmpeg', '-i', 'audio.mp3',
|
104 |
-
# 'audio.wav'])
|
105 |
|
106 |
def AudioTranscribe(audio, retries=5):
|
107 |
if retries:
|
@@ -116,9 +128,19 @@ def AudioTranscribe(audio, retries=5):
|
|
116 |
else:
|
117 |
raise gr.Error("There is some issue ith Audio Transcriber. Please try again later!")
|
118 |
|
119 |
-
def VideoTranscribe(video):
|
120 |
-
|
121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
return Transcribe()
|
123 |
|
124 |
def YoutubeTranscribe(URL, retries = 5):
|
@@ -126,10 +148,7 @@ def YoutubeTranscribe(URL, retries = 5):
|
|
126 |
if "youtu" not in URL.lower():
|
127 |
raise gr.Error(f"{URL} is not a valid youtube URL.")
|
128 |
else:
|
129 |
-
|
130 |
-
os.remove("temp_audio.wav")
|
131 |
-
except OSError:
|
132 |
-
pass
|
133 |
ydl_opts = {
|
134 |
'format': 'bestaudio/best',
|
135 |
'outtmpl': 'temp_audio.%(ext)s',
|
@@ -145,10 +164,7 @@ def YoutubeTranscribe(URL, retries = 5):
|
|
145 |
return YoutubeTranscribe(URL, retries-1)
|
146 |
stream = ffmpeg.input('temp_audio.m4a')
|
147 |
stream = ffmpeg.output(stream, 'temp_audio.wav')
|
148 |
-
|
149 |
-
os.remove("temp_audio.m4a")
|
150 |
-
except OSError:
|
151 |
-
pass
|
152 |
return Transcribe()
|
153 |
else:
|
154 |
raise gr.Error(f"Unable to get video from {URL}")
|
@@ -170,5 +186,4 @@ at = gr.Interface(
|
|
170 |
)
|
171 |
|
172 |
demo = gr.TabbedInterface([ut, vt, at], ["Youtube URL", "Video", "Audio"])
|
173 |
-
demo.launch()
|
174 |
-
# YoutubeTranscribe('https://www.youtube.com/watch?v=GECcjrYHH8w')
|
|
|
9 |
import subprocess
|
10 |
import gradio as gr
|
11 |
import traceback
|
12 |
+
import json
|
13 |
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token="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")
|
14 |
+
__FILES = set()
|
15 |
|
16 |
+
def CreateFile(filename):
|
17 |
+
__FILES.add(filename)
|
18 |
+
return filename
|
19 |
|
20 |
+
def RemoveFile(filename):
|
21 |
+
if (os.path.exist(filename)):
|
22 |
+
os.remove(filename)
|
23 |
+
|
24 |
+
def RemoveAllFiles():
|
25 |
+
for file in __FILES:
|
26 |
+
if (os.path.exist(file)):
|
27 |
+
os.remove(file)
|
28 |
+
|
29 |
def Transcribe(audio="temp_audio.wav"):
|
30 |
def millisec(timeStr):
|
31 |
spl = timeStr.split(":")
|
|
|
47 |
as_audio = AudioSegment.from_wav(audio)
|
48 |
DEMO_FILE = {'uri': 'blabal', 'audio': audio}
|
49 |
dz = pipeline(DEMO_FILE)
|
50 |
+
with open(CreateFile(f"diarization_{audio}.txt"), "w") as text_file:
|
51 |
text_file.write(str(dz))
|
52 |
+
dz = open(CreateFile(f"diarization_{audio}.txt")).read().splitlines()
|
53 |
dzList = []
|
54 |
for l in dz:
|
55 |
start, end = tuple(re.findall('[0-9]+:[0-9]+:[0-9]+\.[0-9]+', string=l))
|
|
|
59 |
dzList.append([start, end, lex])
|
60 |
sounds = spacer
|
61 |
segments = []
|
62 |
+
dz = open(CreateFile(f"diarization_{audio}.txt")).read().splitlines()
|
63 |
for l in dz:
|
64 |
start, end = tuple(re.findall('[0-9]+:[0-9]+:[0-9]+\.[0-9]+', string=l))
|
65 |
start = millisec(start)
|
|
|
67 |
segments.append(len(sounds))
|
68 |
sounds = sounds.append(as_audio[start:end], crossfade=0)
|
69 |
sounds = sounds.append(spacer, crossfade=0)
|
70 |
+
sounds.export(CreateFile(f"dz_{audio}.wav"), format="wav")
|
71 |
return f"dz_{audio}.wav", dzList, segments
|
72 |
|
73 |
def transcribe(dz_audio):
|
|
|
96 |
else:
|
97 |
conversation.append([dzList[i][2], c[2]])
|
98 |
#print(f"[{dzList[i][2]}] {c[2]}")
|
99 |
+
return conversation, ("".join([f"{speaker} --> {text}\n" for speaker, text in conversation]))
|
100 |
|
101 |
spacermilli, spacer = preprocess(audio)
|
102 |
dz_audio, dzList, segments = diarization(audio)
|
103 |
+
conversation, t_text = transcribe(dz_audio)
|
104 |
try:
|
105 |
os.remove("temp_audio.wav")
|
106 |
except OSError:
|
|
|
113 |
os.remove(f"diarization_{audio}.txt")
|
114 |
except OSError:
|
115 |
pass
|
116 |
+
return t_text, json.dumps(conversation)
|
|
|
|
|
117 |
|
118 |
def AudioTranscribe(audio, retries=5):
|
119 |
if retries:
|
|
|
128 |
else:
|
129 |
raise gr.Error("There is some issue ith Audio Transcriber. Please try again later!")
|
130 |
|
131 |
+
def VideoTranscribe(video, retries=5):
|
132 |
+
if retries:
|
133 |
+
try:
|
134 |
+
command = f"ffmpeg -i {video} -ab 160k -ac 2 -ar 44100 -vn temp_audio.wav"
|
135 |
+
subprocess.call(command, shell=True)
|
136 |
+
except Exception as ex:
|
137 |
+
traceback.print_exc()
|
138 |
+
return VideoTranscribe(video, retries-1)
|
139 |
+
if not (os.path.exist("temp_audio.wav")):
|
140 |
+
return VideoTranscribe(video, retries-1)
|
141 |
+
return Transcribe()
|
142 |
+
else:
|
143 |
+
raise gr.Error("There is some issue ith Video Transcriber. Please try again later!")
|
144 |
return Transcribe()
|
145 |
|
146 |
def YoutubeTranscribe(URL, retries = 5):
|
|
|
148 |
if "youtu" not in URL.lower():
|
149 |
raise gr.Error(f"{URL} is not a valid youtube URL.")
|
150 |
else:
|
151 |
+
RemoveFile("temp_audio.wav")
|
|
|
|
|
|
|
152 |
ydl_opts = {
|
153 |
'format': 'bestaudio/best',
|
154 |
'outtmpl': 'temp_audio.%(ext)s',
|
|
|
164 |
return YoutubeTranscribe(URL, retries-1)
|
165 |
stream = ffmpeg.input('temp_audio.m4a')
|
166 |
stream = ffmpeg.output(stream, 'temp_audio.wav')
|
167 |
+
RemoveFile("temp_audio.m4a")
|
|
|
|
|
|
|
168 |
return Transcribe()
|
169 |
else:
|
170 |
raise gr.Error(f"Unable to get video from {URL}")
|
|
|
186 |
)
|
187 |
|
188 |
demo = gr.TabbedInterface([ut, vt, at], ["Youtube URL", "Video", "Audio"])
|
189 |
+
demo.launch()
|
|