Spaces:

salmanmapkar
/

audio-video-transcriber

Runtime error

App Files Files Community

salmanmapkar commited on Dec 17, 2022

Commit

5da7484

1 Parent(s): 6fc22e6

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -22

app.py CHANGED Viewed

@@ -9,9 +9,23 @@ import ffmpeg
 import subprocess
 import gradio as gr
 import traceback
 pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token="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")
 def Transcribe(audio="temp_audio.wav"):
     def millisec(timeStr):
         spl = timeStr.split(":")
@@ -33,9 +47,9 @@ def Transcribe(audio="temp_audio.wav"):
         as_audio = AudioSegment.from_wav(audio)
         DEMO_FILE = {'uri': 'blabal', 'audio': audio}
         dz = pipeline(DEMO_FILE)
-        with open(f"diarization_{audio}.txt", "w") as text_file:
             text_file.write(str(dz))
-        dz = open(f"diarization_{audio}.txt").read().splitlines()
         dzList = []
         for l in dz:
             start, end =  tuple(re.findall('[0-9]+:[0-9]+:[0-9]+\.[0-9]+', string=l))
@@ -45,7 +59,7 @@ def Transcribe(audio="temp_audio.wav"):
             dzList.append([start, end, lex])
         sounds = spacer
         segments = []
-        dz = open(f"diarization_{audio}.txt").read().splitlines()
         for l in dz:
             start, end =  tuple(re.findall('[0-9]+:[0-9]+:[0-9]+\.[0-9]+', string=l))
             start = millisec(start)
@@ -53,7 +67,7 @@ def Transcribe(audio="temp_audio.wav"):
             segments.append(len(sounds))
             sounds = sounds.append(as_audio[start:end], crossfade=0)
             sounds = sounds.append(spacer, crossfade=0)
-        sounds.export(f"dz_{audio}.wav", format="wav")
         return f"dz_{audio}.wav", dzList, segments
     def transcribe(dz_audio):
@@ -82,11 +96,11 @@ def Transcribe(audio="temp_audio.wav"):
                   else:
                       conversation.append([dzList[i][2], c[2]])
                   #print(f"[{dzList[i][2]}] {c[2]}")
-        return ("".join([f"{speaker} --> {text}\n" for speaker, text in conversation]))
     spacermilli, spacer = preprocess(audio)
     dz_audio, dzList, segments = diarization(audio)
-    t_text = transcribe(dz_audio)
     try:
         os.remove("temp_audio.wav")
     except OSError:
@@ -99,9 +113,7 @@ def Transcribe(audio="temp_audio.wav"):
         os.remove(f"diarization_{audio}.txt")
     except OSError:
         pass
-    return t_text
-# subprocess.call(['ffmpeg', '-i', 'audio.mp3',
-#                    'audio.wav'])
 def AudioTranscribe(audio, retries=5):
     if retries:
@@ -116,9 +128,19 @@ def AudioTranscribe(audio, retries=5):
     else:
         raise gr.Error("There is some issue ith Audio Transcriber. Please try again later!")
-def VideoTranscribe(video):
-    command = f"ffmpeg -i {video} -ab 160k -ac 2 -ar 44100 -vn temp_audio.wav"
-    subprocess.call(command, shell=True)
     return Transcribe()
 def YoutubeTranscribe(URL, retries = 5):
@@ -126,10 +148,7 @@ def YoutubeTranscribe(URL, retries = 5):
         if "youtu" not in URL.lower():
             raise gr.Error(f"{URL} is not a valid youtube URL.")
         else:
-            try:
-                os.remove("temp_audio.wav")
-            except OSError:
-                pass
             ydl_opts = {
                 'format': 'bestaudio/best',
                 'outtmpl': 'temp_audio.%(ext)s',
@@ -145,10 +164,7 @@ def YoutubeTranscribe(URL, retries = 5):
                 return YoutubeTranscribe(URL, retries-1)
             stream = ffmpeg.input('temp_audio.m4a')
             stream = ffmpeg.output(stream, 'temp_audio.wav')
-            try:
-                os.remove("temp_audio.m4a")
-            except OSError:
-                pass
             return Transcribe()
     else:
         raise gr.Error(f"Unable to get video from {URL}")
@@ -170,5 +186,4 @@ at = gr.Interface(
 )
 demo = gr.TabbedInterface([ut, vt, at], ["Youtube URL", "Video", "Audio"])
-demo.launch()
-# YoutubeTranscribe('https://www.youtube.com/watch?v=GECcjrYHH8w')

 import subprocess
 import gradio as gr
 import traceback
+import json
 pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token="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")
+__FILES = set()
+def CreateFile(filename):
+    __FILES.add(filename)
+    return filename
+def RemoveFile(filename):
+    if (os.path.exist(filename)):
+        os.remove(filename)
+def RemoveAllFiles():
+    for file in __FILES:
+        if (os.path.exist(file)):
+            os.remove(file)
 def Transcribe(audio="temp_audio.wav"):
     def millisec(timeStr):
         spl = timeStr.split(":")
         as_audio = AudioSegment.from_wav(audio)
         DEMO_FILE = {'uri': 'blabal', 'audio': audio}
         dz = pipeline(DEMO_FILE)
+        with open(CreateFile(f"diarization_{audio}.txt"), "w") as text_file:
             text_file.write(str(dz))
+        dz = open(CreateFile(f"diarization_{audio}.txt")).read().splitlines()
         dzList = []
         for l in dz:
             start, end =  tuple(re.findall('[0-9]+:[0-9]+:[0-9]+\.[0-9]+', string=l))
             dzList.append([start, end, lex])
         sounds = spacer
         segments = []
+        dz = open(CreateFile(f"diarization_{audio}.txt")).read().splitlines()
         for l in dz:
             start, end =  tuple(re.findall('[0-9]+:[0-9]+:[0-9]+\.[0-9]+', string=l))
             start = millisec(start)
             segments.append(len(sounds))
             sounds = sounds.append(as_audio[start:end], crossfade=0)
             sounds = sounds.append(spacer, crossfade=0)
+        sounds.export(CreateFile(f"dz_{audio}.wav"), format="wav")
         return f"dz_{audio}.wav", dzList, segments
     def transcribe(dz_audio):
                   else:
                       conversation.append([dzList[i][2], c[2]])
                   #print(f"[{dzList[i][2]}] {c[2]}")
+        return conversation, ("".join([f"{speaker} --> {text}\n" for speaker, text in conversation]))
     spacermilli, spacer = preprocess(audio)
     dz_audio, dzList, segments = diarization(audio)
+    conversation, t_text = transcribe(dz_audio)
     try:
         os.remove("temp_audio.wav")
     except OSError:
         os.remove(f"diarization_{audio}.txt")
     except OSError:
         pass
+    return t_text, json.dumps(conversation)
 def AudioTranscribe(audio, retries=5):
     if retries:
     else:
         raise gr.Error("There is some issue ith Audio Transcriber. Please try again later!")
+def VideoTranscribe(video, retries=5):
+    if retries:
+        try:
+            command = f"ffmpeg -i {video} -ab 160k -ac 2 -ar 44100 -vn temp_audio.wav"
+            subprocess.call(command, shell=True)
+        except Exception as ex:
+            traceback.print_exc()
+            return VideoTranscribe(video, retries-1)
+        if not (os.path.exist("temp_audio.wav")):
+            return VideoTranscribe(video, retries-1)
+        return Transcribe()
+    else:
+        raise gr.Error("There is some issue ith Video Transcriber. Please try again later!")
     return Transcribe()
 def YoutubeTranscribe(URL, retries = 5):
         if "youtu" not in URL.lower():
             raise gr.Error(f"{URL} is not a valid youtube URL.")
         else:
+            RemoveFile("temp_audio.wav")
             ydl_opts = {
                 'format': 'bestaudio/best',
                 'outtmpl': 'temp_audio.%(ext)s',
                 return YoutubeTranscribe(URL, retries-1)
             stream = ffmpeg.input('temp_audio.m4a')
             stream = ffmpeg.output(stream, 'temp_audio.wav')
+            RemoveFile("temp_audio.m4a")
             return Transcribe()
     else:
         raise gr.Error(f"Unable to get video from {URL}")
 )
 demo = gr.TabbedInterface([ut, vt, at], ["Youtube URL", "Video", "Audio"])
+demo.launch()